From c6893613b177dcebe3d9add1f513ab76710da59f Mon Sep 17 00:00:00 2001 From: sax Date: Thu, 13 Jun 2024 12:32:36 +0200 Subject: [PATCH] updates CI ci:debug --- src/hope_dedup_engine/__init__.py | 5 +- src/hope_dedup_engine/apps/api/admin.py | 7 +- src/hope_dedup_engine/apps/api/auth.py | 4 +- .../apps/api/models/__init__.py | 6 +- src/hope_dedup_engine/apps/api/models/auth.py | 4 +- .../apps/api/models/deduplication.py | 33 ++----- src/hope_dedup_engine/apps/api/serializers.py | 15 +--- src/hope_dedup_engine/apps/api/urls.py | 25 ++---- src/hope_dedup_engine/apps/api/views.py | 80 ++++------------- .../core/management/commands/createsystem.py | 4 +- .../apps/core/management/commands/env.py | 24 ++--- .../apps/core/management/commands/upgrade.py | 13 +-- .../apps/faces/celery_tasks.py | 9 +- .../apps/faces/utils/celery_utils.py | 8 +- .../apps/faces/utils/duplication_detector.py | 90 ++++--------------- .../apps/faces/validators.py | 4 +- src/hope_dedup_engine/apps/security/models.py | 4 +- src/hope_dedup_engine/apps/social/pipeline.py | 4 +- src/hope_dedup_engine/config/__init__.py | 76 +++------------- .../config/fragments/constance.py | 6 +- src/hope_dedup_engine/config/fragments/csp.py | 26 +----- src/hope_dedup_engine/state.py | 11 +-- src/hope_dedup_engine/utils/http.py | 4 +- src/hope_dedup_engine/utils/security.py | 5 +- 24 files changed, 91 insertions(+), 376 deletions(-) diff --git a/src/hope_dedup_engine/__init__.py b/src/hope_dedup_engine/__init__.py index 01518441..0e2df472 100644 --- a/src/hope_dedup_engine/__init__.py +++ b/src/hope_dedup_engine/__init__.py @@ -1,5 +1,6 @@ -# from hope_dedup_engine.config.celery import app as celery_app +from hope_dedup_engine.config.celery import app as celery_app + VERSION = __version__ = "0.1.0" -# __all__ = ("celery_app",) +__all__ = ("celery_app",) diff --git a/src/hope_dedup_engine/apps/api/admin.py b/src/hope_dedup_engine/apps/api/admin.py index 7753cfe2..9f384e0a 100644 --- a/src/hope_dedup_engine/apps/api/admin.py +++ b/src/hope_dedup_engine/apps/api/admin.py @@ -1,11 +1,6 @@ from django.contrib import admin -from hope_dedup_engine.apps.api.models import ( - DeduplicationSet, - Duplicate, - HDEToken, - Image, -) +from hope_dedup_engine.apps.api.models import DeduplicationSet, Duplicate, HDEToken, Image admin.site.register(DeduplicationSet) admin.site.register(Duplicate) diff --git a/src/hope_dedup_engine/apps/api/auth.py b/src/hope_dedup_engine/apps/api/auth.py index a63dd6c2..4a78ffcc 100644 --- a/src/hope_dedup_engine/apps/api/auth.py +++ b/src/hope_dedup_engine/apps/api/auth.py @@ -14,9 +14,7 @@ def has_permission(self, request: Request, view: View) -> bool: class UserAndDeduplicationSetAreOfTheSameSystem(BasePermission): def has_permission(self, request: Request, view: View) -> bool: - if deduplication_set_pk := view.kwargs.get( - "deduplication_set_pk" - ) or view.kwargs.get("pk"): + if deduplication_set_pk := view.kwargs.get("deduplication_set_pk") or view.kwargs.get("pk"): return DeduplicationSet.objects.filter( external_system=request.user.external_system, pk=deduplication_set_pk ).exists() diff --git a/src/hope_dedup_engine/apps/api/models/__init__.py b/src/hope_dedup_engine/apps/api/models/__init__.py index 40bdb2fa..571a4bfd 100644 --- a/src/hope_dedup_engine/apps/api/models/__init__.py +++ b/src/hope_dedup_engine/apps/api/models/__init__.py @@ -1,6 +1,2 @@ from hope_dedup_engine.apps.api.models.auth import HDEToken # noqa: F401 -from hope_dedup_engine.apps.api.models.deduplication import ( # noqa: F401 - DeduplicationSet, - Duplicate, - Image, -) +from hope_dedup_engine.apps.api.models.deduplication import DeduplicationSet, Duplicate, Image # noqa: F401 diff --git a/src/hope_dedup_engine/apps/api/models/auth.py b/src/hope_dedup_engine/apps/api/models/auth.py index 050a852b..025370bd 100644 --- a/src/hope_dedup_engine/apps/api/models/auth.py +++ b/src/hope_dedup_engine/apps/api/models/auth.py @@ -5,6 +5,4 @@ class HDEToken(Token): - user = models.ForeignKey( - settings.AUTH_USER_MODEL, related_name="auth_tokens", on_delete=models.CASCADE - ) + user = models.ForeignKey(settings.AUTH_USER_MODEL, related_name="auth_tokens", on_delete=models.CASCADE) diff --git a/src/hope_dedup_engine/apps/api/models/deduplication.py b/src/hope_dedup_engine/apps/api/models/deduplication.py index bbeb8cbd..4bce2d0e 100644 --- a/src/hope_dedup_engine/apps/api/models/deduplication.py +++ b/src/hope_dedup_engine/apps/api/models/deduplication.py @@ -12,10 +12,7 @@ class DeduplicationSet(models.Model): class State(models.IntegerChoices): CLEAN = 0, "Clean" # Deduplication set is created or already processed - DIRTY = ( - 1, - "Dirty", - ) # Images are added to deduplication set, but not yet processed + DIRTY = 1, "Dirty" # Images are added to deduplication set, but not yet processed PROCESSING = 2, "Processing" # Images are being processed ERROR = 3, "Error" # Error occurred @@ -30,19 +27,11 @@ class State(models.IntegerChoices): external_system = models.ForeignKey(ExternalSystem, on_delete=models.CASCADE) error = models.CharField(max_length=255, null=True, blank=True) created_by = models.ForeignKey( - settings.AUTH_USER_MODEL, - on_delete=models.CASCADE, - null=True, - blank=True, - related_name="+", + settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name="+" ) created_at = models.DateTimeField(auto_now_add=True) updated_by = models.ForeignKey( - settings.AUTH_USER_MODEL, - on_delete=models.CASCADE, - null=True, - blank=True, - related_name="+", + settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name="+" ) updated_at = models.DateTimeField(auto_now=True) notification_url = models.CharField(max_length=255, null=True, blank=True) @@ -54,11 +43,7 @@ class Image(models.Model): reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH) filename = models.CharField(max_length=255) created_by = models.ForeignKey( - settings.AUTH_USER_MODEL, - on_delete=models.CASCADE, - null=True, - blank=True, - related_name="+", + settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name="+" ) created_at = models.DateTimeField(auto_now_add=True) @@ -78,15 +63,9 @@ class IgnoredKeyPair(models.Model): second_reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH) class Meta: - unique_together = ( - "deduplication_set", - "first_reference_pk", - "second_reference_pk", - ) + unique_together = "deduplication_set", "first_reference_pk", "second_reference_pk" @override def save(self, **kwargs: Any) -> None: - self.first_reference_pk, self.second_reference_pk = sorted( - (self.first_reference_pk, self.second_reference_pk) - ) + self.first_reference_pk, self.second_reference_pk = sorted((self.first_reference_pk, self.second_reference_pk)) super().save(**kwargs) diff --git a/src/hope_dedup_engine/apps/api/serializers.py b/src/hope_dedup_engine/apps/api/serializers.py index 4ef1087a..2227e72a 100644 --- a/src/hope_dedup_engine/apps/api/serializers.py +++ b/src/hope_dedup_engine/apps/api/serializers.py @@ -1,11 +1,7 @@ from rest_framework import serializers from hope_dedup_engine.apps.api.models import DeduplicationSet -from hope_dedup_engine.apps.api.models.deduplication import ( - Duplicate, - IgnoredKeyPair, - Image, -) +from hope_dedup_engine.apps.api.models.deduplication import Duplicate, IgnoredKeyPair, Image class DeduplicationSetSerializer(serializers.ModelSerializer): @@ -14,14 +10,7 @@ class DeduplicationSetSerializer(serializers.ModelSerializer): class Meta: model = DeduplicationSet exclude = ("deleted",) - read_only_fields = ( - "external_system", - "created_at", - "created_by", - "deleted", - "updated_at", - "updated_by", - ) + read_only_fields = "external_system", "created_at", "created_by", "deleted", "updated_at", "updated_by" class ImageSerializer(serializers.ModelSerializer): diff --git a/src/hope_dedup_engine/apps/api/urls.py b/src/hope_dedup_engine/apps/api/urls.py index 8bda942a..fb83633a 100644 --- a/src/hope_dedup_engine/apps/api/urls.py +++ b/src/hope_dedup_engine/apps/api/urls.py @@ -20,25 +20,12 @@ ) router = routers.SimpleRouter() -router.register( - DEDUPLICATION_SET_LIST, DeduplicationSetViewSet, basename=DEDUPLICATION_SET_LIST -) +router.register(DEDUPLICATION_SET_LIST, DeduplicationSetViewSet, basename=DEDUPLICATION_SET_LIST) -deduplication_sets_router = nested_routers.NestedSimpleRouter( - router, DEDUPLICATION_SET_LIST, lookup=DEDUPLICATION_SET -) +deduplication_sets_router = nested_routers.NestedSimpleRouter(router, DEDUPLICATION_SET_LIST, lookup=DEDUPLICATION_SET) deduplication_sets_router.register(IMAGE_LIST, ImageViewSet, basename=IMAGE_LIST) -deduplication_sets_router.register( - BULK_IMAGE_LIST, BulkImageViewSet, basename=BULK_IMAGE_LIST -) -deduplication_sets_router.register( - DUPLICATE_LIST, DuplicateViewSet, basename=DUPLICATE_LIST -) -deduplication_sets_router.register( - IGNORED_KEYS_LIST, IgnoredKeyPairViewSet, basename=IGNORED_KEYS_LIST -) +deduplication_sets_router.register(BULK_IMAGE_LIST, BulkImageViewSet, basename=BULK_IMAGE_LIST) +deduplication_sets_router.register(DUPLICATE_LIST, DuplicateViewSet, basename=DUPLICATE_LIST) +deduplication_sets_router.register(IGNORED_KEYS_LIST, IgnoredKeyPairViewSet, basename=IGNORED_KEYS_LIST) -urlpatterns = [ - path("", include(router.urls)), - path("", include(deduplication_sets_router.urls)), -] +urlpatterns = [path("", include(router.urls)), path("", include(deduplication_sets_router.urls))] diff --git a/src/hope_dedup_engine/apps/api/views.py b/src/hope_dedup_engine/apps/api/views.py index fab5fd17..eb07413e 100644 --- a/src/hope_dedup_engine/apps/api/views.py +++ b/src/hope_dedup_engine/apps/api/views.py @@ -18,16 +18,9 @@ HDETokenAuthentication, UserAndDeduplicationSetAreOfTheSameSystem, ) -from hope_dedup_engine.apps.api.const import ( - DEDUPLICATION_SET_FILTER, - DEDUPLICATION_SET_PARAM, -) +from hope_dedup_engine.apps.api.const import DEDUPLICATION_SET_FILTER, DEDUPLICATION_SET_PARAM from hope_dedup_engine.apps.api.models import DeduplicationSet -from hope_dedup_engine.apps.api.models.deduplication import ( - Duplicate, - IgnoredKeyPair, - Image, -) +from hope_dedup_engine.apps.api.models.deduplication import Duplicate, IgnoredKeyPair, Image from hope_dedup_engine.apps.api.serializers import ( DeduplicationSetSerializer, DuplicateSerializer, @@ -43,29 +36,17 @@ class DeduplicationSetViewSet( - mixins.ListModelMixin, - mixins.CreateModelMixin, - mixins.DestroyModelMixin, - viewsets.GenericViewSet, + mixins.ListModelMixin, mixins.CreateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet ): authentication_classes = (HDETokenAuthentication,) - permission_classes = ( - IsAuthenticated, - AssignedToExternalSystem, - UserAndDeduplicationSetAreOfTheSameSystem, - ) + permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem serializer_class = DeduplicationSetSerializer def get_queryset(self) -> QuerySet: - return DeduplicationSet.objects.filter( - external_system=self.request.user.external_system, deleted=False - ) + return DeduplicationSet.objects.filter(external_system=self.request.user.external_system, deleted=False) def perform_create(self, serializer: Serializer) -> None: - serializer.save( - created_by=self.request.user, - external_system=self.request.user.external_system, - ) + serializer.save(created_by=self.request.user, external_system=self.request.user.external_system) def perform_destroy(self, instance: DeduplicationSet) -> None: instance.updated_by = self.request.user @@ -89,9 +70,7 @@ def process(self, request: Request, pk: UUID | None = None) -> Response: self._start_processing(deduplication_set) return Response({MESSAGE: STARTED}) case DeduplicationSet.State.PROCESSING: - return Response( - {MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST - ) + return Response({MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST) class ImageViewSet( @@ -102,11 +81,7 @@ class ImageViewSet( viewsets.GenericViewSet, ): authentication_classes = (HDETokenAuthentication,) - permission_classes = ( - IsAuthenticated, - AssignedToExternalSystem, - UserAndDeduplicationSetAreOfTheSameSystem, - ) + permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem serializer_class = ImageSerializer queryset = Image.objects.all() parent_lookup_kwargs = { @@ -138,18 +113,14 @@ def __setitem__(self, key: str, value: Any) -> None: class WrapRequestDataMixin: - def initialize_request( - self, request: Request, *args: Any, **kwargs: Any - ) -> Request: + def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Request: request = super().initialize_request(request, *args, **kwargs) request._full_data = ListDataWrapper(request.data) return request class UnwrapRequestDataMixin: - def initialize_request( - self, request: Request, *args: Any, **kwargs: Any - ) -> Request: + def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Request: request = super().initialize_request(request, *args, **kwargs) request._full_data = request._full_data.data return request @@ -165,11 +136,7 @@ class BulkImageViewSet( viewsets.GenericViewSet, ): authentication_classes = (HDETokenAuthentication,) - permission_classes = ( - IsAuthenticated, - AssignedToExternalSystem, - UserAndDeduplicationSetAreOfTheSameSystem, - ) + permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem serializer_class = ImageSerializer queryset = Image.objects.all() parent_lookup_kwargs = { @@ -181,9 +148,7 @@ def get_serializer(self, *args: Any, **kwargs: Any) -> Serializer: def perform_create(self, serializer: Serializer) -> None: super().perform_create(serializer) - if deduplication_set := ( - serializer.instance[0].deduplication_set if serializer.instance else None - ): + if deduplication_set := serializer.instance[0].deduplication_set if serializer.instance else None: deduplication_set.updated_by = self.request.user deduplication_set.save() @@ -196,15 +161,9 @@ def clear(self, request: Request, deduplication_set_pk: str) -> Response: return Response(status=status.HTTP_204_NO_CONTENT) -class DuplicateViewSet( - nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet -): +class DuplicateViewSet(nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet): authentication_classes = (HDETokenAuthentication,) - permission_classes = ( - IsAuthenticated, - AssignedToExternalSystem, - UserAndDeduplicationSetAreOfTheSameSystem, - ) + permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem serializer_class = DuplicateSerializer queryset = Duplicate.objects.all() parent_lookup_kwargs = { @@ -213,17 +172,10 @@ class DuplicateViewSet( class IgnoredKeyPairViewSet( - nested_viewsets.NestedViewSetMixin, - mixins.ListModelMixin, - mixins.CreateModelMixin, - viewsets.GenericViewSet, + nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, mixins.CreateModelMixin, viewsets.GenericViewSet ): authentication_classes = (HDETokenAuthentication,) - permission_classes = ( - IsAuthenticated, - AssignedToExternalSystem, - UserAndDeduplicationSetAreOfTheSameSystem, - ) + permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem serializer_class = IgnoredKeyPairSerializer queryset = IgnoredKeyPair.objects.all() parent_lookup_kwargs = { diff --git a/src/hope_dedup_engine/apps/core/management/commands/createsystem.py b/src/hope_dedup_engine/apps/core/management/commands/createsystem.py index f9dafbac..fbe2707d 100644 --- a/src/hope_dedup_engine/apps/core/management/commands/createsystem.py +++ b/src/hope_dedup_engine/apps/core/management/commands/createsystem.py @@ -10,9 +10,7 @@ def add_arguments(self, parser): parser.add_argument("name") def handle(self, *args, **options): - system, created = ExternalSystem.objects.get_or_create( - name=(name := options["name"]) - ) + system, created = ExternalSystem.objects.get_or_create(name=(name := options["name"])) if created: self.stdout.write(self.style.SUCCESS(f'"{name}" system created.')) else: diff --git a/src/hope_dedup_engine/apps/core/management/commands/env.py b/src/hope_dedup_engine/apps/core/management/commands/env.py index a1bfff67..782c94f4 100644 --- a/src/hope_dedup_engine/apps/core/management/commands/env.py +++ b/src/hope_dedup_engine/apps/core/management/commands/env.py @@ -33,26 +33,14 @@ def add_arguments(self, parser: "CommandParser") -> None: default="export {key}={value}", help="Check env for variable availability (default: 'export {key}=\"{value}\"')", ) - parser.add_argument( - "--develop", action="store_true", help="Display development values" - ) - parser.add_argument( - "--config", action="store_true", help="Only list changed values" - ) + parser.add_argument("--develop", action="store_true", help="Display development values") + parser.add_argument("--config", action="store_true", help="Only list changed values") parser.add_argument("--diff", action="store_true", help="Mark changed values") parser.add_argument( - "--check", - action="store_true", - dest="check", - default=False, - help="Check env for variable availability", + "--check", action="store_true", dest="check", default=False, help="Check env for variable availability" ) parser.add_argument( - "--ignore-errors", - action="store_true", - dest="ignore_errors", - default=False, - help="Do not fail", + "--ignore-errors", action="store_true", dest="ignore_errors", default=False, help="Do not fail" ) def handle(self, *args: "Any", **options: "Any") -> None: @@ -74,9 +62,7 @@ def handle(self, *args: "Any", **options: "Any") -> None: else: value: Any = env.get_value(k) - line: str = pattern.format( - key=k, value=clean(value), help=help, default=default - ) + line: str = pattern.format(key=k, value=clean(value), help=help, default=default) if options["diff"]: if value != default: line = self.style.SUCCESS(line) diff --git a/src/hope_dedup_engine/apps/core/management/commands/upgrade.py b/src/hope_dedup_engine/apps/core/management/commands/upgrade.py index 513e2f01..a6e09ff0 100644 --- a/src/hope_dedup_engine/apps/core/management/commands/upgrade.py +++ b/src/hope_dedup_engine/apps/core/management/commands/upgrade.py @@ -89,9 +89,7 @@ def get_options(self, options: dict[str, Any]) -> None: self.debug = options["debug"] self.admin_email = str(options["admin_email"] or env("ADMIN_EMAIL", "")) - self.admin_password = str( - options["admin_password"] or env("ADMIN_PASSWORD", "") - ) + self.admin_password = str(options["admin_password"] or env("ADMIN_PASSWORD", "")) def halt(self, e: Exception) -> None: self.stdout.write(str(e), style_func=self.style.ERROR) @@ -125,9 +123,7 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: C901 call_command("check", deploy=True, verbosity=self.verbosity - 1) if self.static: static_root = Path(env("STATIC_ROOT")) - echo( - f"Run collectstatic to: '{static_root}' - '{static_root.absolute()}" - ) + echo(f"Run collectstatic to: '{static_root}' - '{static_root.absolute()}") if not static_root.exists(): static_root.mkdir(parents=True) call_command("collectstatic", **extra) @@ -148,10 +144,7 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: C901 style_func=self.style.WARNING, ) else: - echo( - f"Creating superuser: {self.admin_email}", - style_func=self.style.WARNING, - ) + echo(f"Creating superuser: {self.admin_email}", style_func=self.style.WARNING) validate_email(self.admin_email) os.environ["DJANGO_SUPERUSER_USERNAME"] = self.admin_email os.environ["DJANGO_SUPERUSER_EMAIL"] = self.admin_email diff --git a/src/hope_dedup_engine/apps/faces/celery_tasks.py b/src/hope_dedup_engine/apps/faces/celery_tasks.py index 2fec0d72..2c156cfb 100644 --- a/src/hope_dedup_engine/apps/faces/celery_tasks.py +++ b/src/hope_dedup_engine/apps/faces/celery_tasks.py @@ -9,9 +9,7 @@ @shared_task(bind=True, soft_time_limit=0.5 * 60 * 60, time_limit=1 * 60 * 60) @task_lifecycle(name="Deduplicate", ttl=1 * 60 * 60) # TODO: Use DeduplicationSet objects as input to deduplication pipeline -def deduplicate( - self, filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]] = tuple() -) -> tuple[tuple[str]]: +def deduplicate(self, filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]] = tuple()) -> tuple[tuple[str]]: """ Deduplicate a set of filenames, ignoring any specified pairs of filenames. @@ -27,8 +25,5 @@ def deduplicate( dd = DuplicationDetector(filenames, ignore_pairs) return dd.find_duplicates() except Exception as e: - self.update_state( - state=states.FAILURE, - meta={"exc_message": str(e), "traceback": traceback.format_exc()}, - ) + self.update_state(state=states.FAILURE, meta={"exc_message": str(e), "traceback": traceback.format_exc()}) raise e diff --git a/src/hope_dedup_engine/apps/faces/utils/celery_utils.py b/src/hope_dedup_engine/apps/faces/utils/celery_utils.py index 6aba6ced..eec34e9a 100644 --- a/src/hope_dedup_engine/apps/faces/utils/celery_utils.py +++ b/src/hope_dedup_engine/apps/faces/utils/celery_utils.py @@ -21,9 +21,7 @@ def wrapper(self, *args, **kwargs) -> any: ignore_pairs = args[1] if args else kwargs.get("ignore_pairs") lock_name: str = f"{name}_{_get_hash(filenames, ignore_pairs)}" if not _acquire_lock(lock_name, ttl): - logger.info( - f"Task {name} with brocker lock {lock_name} is already running." - ) + logger.info(f"Task {name} with brocker lock {lock_name} is already running.") return None try: @@ -51,8 +49,6 @@ def _release_lock(lock_name: str) -> None: def _get_hash(filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]]) -> str: fn_str: str = ",".join(sorted(filenames)) - ip_sorted = sorted( - (min(item1, item2), max(item1, item2)) for item1, item2 in ignore_pairs - ) + ip_sorted = sorted((min(item1, item2), max(item1, item2)) for item1, item2 in ignore_pairs) ip_str = ",".join(f"{item1},{item2}" for item1, item2 in ip_sorted) return hashlib.sha256(f"{fn_str}{ip_str}".encode()).hexdigest() diff --git a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py index 5b9257e7..c0683943 100644 --- a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py +++ b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py @@ -11,11 +11,7 @@ import numpy as np from constance import config -from hope_dedup_engine.apps.core.storage import ( - CV2DNNStorage, - HDEAzureStorage, - HOPEAzureStorage, -) +from hope_dedup_engine.apps.core.storage import CV2DNNStorage, HDEAzureStorage, HOPEAzureStorage class DuplicationDetector: @@ -36,9 +32,7 @@ class FaceEncodingsConfig: logger: logging.Logger = logging.getLogger(__name__) - def __init__( - self, filenames: tuple[str], ignore_pairs: tuple[str, str] = tuple() - ) -> None: + def __init__(self, filenames: tuple[str], ignore_pairs: tuple[str, str] = tuple()) -> None: """ Initialize the DuplicationDetector with the given filenames. @@ -89,13 +83,7 @@ def _set_net(self, storage: CV2DNNStorage) -> cv2.dnn_Net: return net def _get_shape(self) -> dict[str, int]: - pattern = ( - r"input_shape\s*\{\s*" - r"dim:\s*(\d+)\s*" - r"dim:\s*(\d+)\s*" - r"dim:\s*(\d+)\s*" - r"dim:\s*(\d+)\s*\}" - ) + pattern = r"input_shape\s*\{\s*" r"dim:\s*(\d+)\s*" r"dim:\s*(\d+)\s*" r"dim:\s*(\d+)\s*" r"dim:\s*(\d+)\s*\}" with open(settings.PROTOTXT_FILE, "r") as file: if match := re.search(pattern, file.read()): return { @@ -107,21 +95,15 @@ def _get_shape(self) -> dict[str, int]: else: raise ValueError("Could not find input_shape in prototxt file.") - def _get_pairs_to_ignore( - self, ignore: tuple[tuple[str, str]] - ) -> set[tuple[str, str]]: + def _get_pairs_to_ignore(self, ignore: tuple[tuple[str, str]]) -> set[tuple[str, str]]: ignore = tuple(tuple(pair) for pair in ignore) if not ignore: return set() if all( - isinstance(pair, tuple) - and len(pair) == 2 - and all(isinstance(item, str) and item for item in pair) + isinstance(pair, tuple) and len(pair) == 2 and all(isinstance(item, str) and item for item in pair) for pair in ignore ): - return {(item1, item2) for item1, item2 in ignore} | { - (item2, item1) for item1, item2 in ignore - } + return {(item1, item2) for item1, item2 in ignore} | {(item2, item1) for item1, item2 in ignore} elif len(ignore) == 2 and all(isinstance(item, str) for item in ignore): return {(ignore[0], ignore[1]), (ignore[1], ignore[0])} else: @@ -135,9 +117,7 @@ def _encodings_filename(self, filename: str) -> str: def _has_encodings(self, filename: str) -> bool: return self.storages["encoded"].exists(self._encodings_filename(filename)) - def _get_face_detections_dnn( - self, filename: str - ) -> list[tuple[int, int, int, int]]: + def _get_face_detections_dnn(self, filename: str) -> list[tuple[int, int, int, int]]: face_regions: list[tuple[int, int, int, int]] = [] try: with self.storages["images"].open(filename, "rb") as img_file: @@ -148,16 +128,9 @@ def _get_face_detections_dnn( # Create a blob (4D tensor) from the image blob = cv2.dnn.blobFromImage( image=cv2.resize( - image, - dsize=( - self.blob_from_image_cfg.shape["height"], - self.blob_from_image_cfg.shape["width"], - ), - ), - size=( - self.blob_from_image_cfg.shape["height"], - self.blob_from_image_cfg.shape["width"], + image, dsize=(self.blob_from_image_cfg.shape["height"], self.blob_from_image_cfg.shape["width"]) ), + size=(self.blob_from_image_cfg.shape["height"], self.blob_from_image_cfg.shape["width"]), scalefactor=self.blob_from_image_cfg.scale_factor, mean=self.blob_from_image_cfg.mean_values, ) @@ -172,26 +145,17 @@ def _get_face_detections_dnn( confidence = detections[0, 0, i, 2] # Filter out weak detections by ensuring the confidence is greater than the minimum confidence if confidence > self.face_detection_confidence: - box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype( - "int" - ) + box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype("int") boxes.append(box) confidences.append(confidence) if boxes: # Apply non-maxima suppression to suppress weak, overlapping bounding boxes - indices = cv2.dnn.NMSBoxes( - boxes, - confidences, - self.face_detection_confidence, - self.nms_threshold, - ) + indices = cv2.dnn.NMSBoxes(boxes, confidences, self.face_detection_confidence, self.nms_threshold) if indices is not None: for i in indices: face_regions.append(tuple(boxes[i])) except Exception as e: - self.logger.exception( - "Error processing face detection for image %s", filename - ) + self.logger.exception("Error processing face detection for image %s", filename) raise e return face_regions @@ -229,19 +193,13 @@ def _encode_face(self, filename: str) -> None: encodings.extend(face_encodings) else: self.logger.error("Invalid face region %s", region) - with self.storages["encoded"].open( - self._encodings_filename(filename), "wb" - ) as f: + with self.storages["encoded"].open(self._encodings_filename(filename), "wb") as f: np.save(f, encodings) except Exception as e: - self.logger.exception( - "Error processing face encodings for image %s", filename - ) + self.logger.exception("Error processing face encodings for image %s", filename) raise e - def _get_duplicated_groups( - self, checked: set[tuple[str, str, float]] - ) -> tuple[tuple[str]]: + def _get_duplicated_groups(self, checked: set[tuple[str, str, float]]) -> tuple[tuple[str]]: # Dictionary to store connections between paths where distances are less than the threshold groups = [] connections = defaultdict(set) @@ -258,14 +216,10 @@ def _get_duplicated_groups( # Try to expand the group ensuring each new path is duplicated to all in the group while queue: neighbor = queue.pop(0) - if neighbor not in new_group and all( - neighbor in connections[member] for member in new_group - ): + if neighbor not in new_group and all(neighbor in connections[member] for member in new_group): new_group.add(neighbor) # Add neighbors of the current neighbor, excluding those already in the group - queue.extend( - [n for n in connections[neighbor] if n not in new_group] - ) + queue.extend([n for n in connections[neighbor] if n not in new_group]) # Add the newly formed group to the list of groups groups.append(new_group) return tuple(map(tuple, groups)) @@ -290,18 +244,12 @@ def find_duplicates(self) -> tuple[tuple[str]]: min_distance = float("inf") for encoding1 in encodings1: if ( - current_min := min( - face_recognition.face_distance( - encodings2, encoding1 - ) - ) + current_min := min(face_recognition.face_distance(encodings2, encoding1)) ) < min_distance: min_distance = current_min checked.add((path1, path2, min_distance)) return self._get_duplicated_groups(checked) except Exception as e: - self.logger.exception( - "Error finding duplicates for images %s", self.filenames - ) + self.logger.exception("Error finding duplicates for images %s", self.filenames) raise e diff --git a/src/hope_dedup_engine/apps/faces/validators.py b/src/hope_dedup_engine/apps/faces/validators.py index d3f3f5bd..1b8288f4 100644 --- a/src/hope_dedup_engine/apps/faces/validators.py +++ b/src/hope_dedup_engine/apps/faces/validators.py @@ -8,9 +8,7 @@ def to_python(self, value): if len(values) != 3: raise ValueError("The tuple must have exactly three elements.") if not all(-255 <= v <= 255 for v in values): - raise ValueError( - "Each value in the tuple must be between -255 and 255." - ) + raise ValueError("Each value in the tuple must be between -255 and 255.") return values except Exception as e: raise ValidationError( diff --git a/src/hope_dedup_engine/apps/security/models.py b/src/hope_dedup_engine/apps/security/models.py index 044c1daa..8ed7506a 100644 --- a/src/hope_dedup_engine/apps/security/models.py +++ b/src/hope_dedup_engine/apps/security/models.py @@ -13,9 +13,7 @@ class ExternalSystem(models.Model): class User(SecurityMixin, AbstractUser): - external_system = models.ForeignKey( - ExternalSystem, on_delete=models.SET_NULL, null=True, blank=True - ) + external_system = models.ForeignKey(ExternalSystem, on_delete=models.SET_NULL, null=True, blank=True) class Meta: abstract = False diff --git a/src/hope_dedup_engine/apps/social/pipeline.py b/src/hope_dedup_engine/apps/social/pipeline.py index 51610b3d..aea7c84c 100644 --- a/src/hope_dedup_engine/apps/social/pipeline.py +++ b/src/hope_dedup_engine/apps/social/pipeline.py @@ -6,9 +6,7 @@ from social_core.backends.base import BaseAuth -def save_to_group( - backend: BaseAuth, user: Optional[User] = None, **kwargs: Any -) -> dict[str, Any]: +def save_to_group(backend: BaseAuth, user: Optional[User] = None, **kwargs: Any) -> dict[str, Any]: if user: grp = Group.objects.get(name=config.NEW_USER_DEFAULT_GROUP) user.groups.add(grp) diff --git a/src/hope_dedup_engine/config/__init__.py b/src/hope_dedup_engine/config/__init__.py index ccd74314..4cd90b32 100644 --- a/src/hope_dedup_engine/config/__init__.py +++ b/src/hope_dedup_engine/config/__init__.py @@ -5,9 +5,7 @@ from environ import Env if TYPE_CHECKING: - ConfigItem: TypeAlias = Union[ - Tuple[type, Any, str, Any], Tuple[type, Any, str], Tuple[type, Any] - ] + ConfigItem: TypeAlias = Union[Tuple[type, Any, str, Any], Tuple[type, Any, str], Tuple[type, Any]] DJANGO_HELP_BASE = "https://docs.djangoproject.com/en/5.0/ref/settings" @@ -22,14 +20,7 @@ class Group(Enum): NOT_SET = "<- not set ->" -EXPLICIT_SET = [ - "DATABASE_URL", - "SECRET_KEY", - "CACHE_URL", - "CELERY_BROKER_URL", - "MEDIA_ROOT", - "STATIC_ROOT", -] +EXPLICIT_SET = ["DATABASE_URL", "SECRET_KEY", "CACHE_URL", "CELERY_BROKER_URL", "MEDIA_ROOT", "STATIC_ROOT"] CONFIG: "Dict[str, ConfigItem]" = { "ADMIN_EMAIL": (str, "", "Initial user created at first deploy"), @@ -38,11 +29,7 @@ class Group(Enum): "AUTHENTICATION_BACKENDS": (list, [], setting("authentication-backends")), "CACHE_URL": (str, "redis://localhost:6379/0"), "CATCH_ALL_EMAIL": (str, "If set all the emails will be sent to this address"), - "CELERY_BROKER_URL": ( - str, - NOT_SET, - "https://docs.celeryq.dev/en/stable/django/first-steps-with-django.html", - ), + "CELERY_BROKER_URL": (str, NOT_SET, "https://docs.celeryq.dev/en/stable/django/first-steps-with-django.html"), "CELERY_TASK_ALWAYS_EAGER": ( bool, False, @@ -67,47 +54,21 @@ class Group(Enum): "postgres://127.0.0.1:5432/dedupe", ), "DEBUG": (bool, False, setting("debug"), True), - "EMAIL_BACKEND": ( - str, - "django.core.mail.backends.smtp.EmailBackend", - setting("email-backend"), - True, - ), + "EMAIL_BACKEND": (str, "django.core.mail.backends.smtp.EmailBackend", setting("email-backend"), True), "EMAIL_HOST": (str, "localhost", setting("email-host"), True), "EMAIL_HOST_USER": (str, "", setting("email-host-user"), True), "EMAIL_HOST_PASSWORD": (str, "", setting("email-host-password"), True), "EMAIL_PORT": (int, "25", setting("email-port"), True), - "EMAIL_SUBJECT_PREFIX": ( - str, - "[Hope-dedupe]", - setting("email-subject-prefix"), - True, - ), + "EMAIL_SUBJECT_PREFIX": (str, "[Hope-dedupe]", setting("email-subject-prefix"), True), "EMAIL_USE_LOCALTIME": (bool, False, setting("email-use-localtime"), True), "EMAIL_USE_TLS": (bool, False, setting("email-use-tls"), True), "EMAIL_USE_SSL": (bool, False, setting("email-use-ssl"), True), "EMAIL_TIMEOUT": (str, None, setting("email-timeout"), True), "LOGGING_LEVEL": (str, "CRITICAL", setting("logging-level")), - "FILE_STORAGE_DEFAULT": ( - str, - "django.core.files.storage.FileSystemStorage", - setting("storages"), - ), - "FILE_STORAGE_MEDIA": ( - str, - "django.core.files.storage.FileSystemStorage", - setting("storages"), - ), - "FILE_STORAGE_STATIC": ( - str, - "django.contrib.staticfiles.storage.StaticFilesStorage", - setting("storages"), - ), - "FILE_STORAGE_HOPE": ( - str, - "django.core.files.storage.FileSystemStorage", - setting("storages"), - ), + "FILE_STORAGE_DEFAULT": (str, "django.core.files.storage.FileSystemStorage", setting("storages")), + "FILE_STORAGE_MEDIA": (str, "django.core.files.storage.FileSystemStorage", setting("storages")), + "FILE_STORAGE_STATIC": (str, "django.contrib.staticfiles.storage.StaticFilesStorage", setting("storages")), + "FILE_STORAGE_HOPE": (str, "django.core.files.storage.FileSystemStorage", setting("storages")), "MEDIA_ROOT": (str, None, setting("media-root")), "MEDIA_URL": (str, "/media/", setting("media-url")), "ROOT_TOKEN": (str, "", ""), @@ -118,29 +79,16 @@ class Group(Enum): "SENTRY_DSN": (str, "", "Sentry DSN"), "SENTRY_ENVIRONMENT": (str, "production", "Sentry Environment"), "SENTRY_URL": (str, "", "Sentry server url"), - "SESSION_COOKIE_DOMAIN": ( - str, - "", - setting("std-setting-SESSION_COOKIE_DOMAIN"), - "localhost", - ), + "SESSION_COOKIE_DOMAIN": (str, "", setting("std-setting-SESSION_COOKIE_DOMAIN"), "localhost"), "SESSION_COOKIE_HTTPONLY": (bool, True, setting("session-cookie-httponly"), False), "SESSION_COOKIE_NAME": (str, "dedupe_session", setting("session-cookie-name")), "SESSION_COOKIE_PATH": (str, "/", setting("session-cookie-path")), "SESSION_COOKIE_SECURE": (bool, True, setting("session-cookie-secure"), False), - "SIGNING_BACKEND": ( - str, - "django.core.signing.TimestampSigner", - setting("signing-backend"), - ), + "SIGNING_BACKEND": (str, "django.core.signing.TimestampSigner", setting("signing-backend")), "SOCIAL_AUTH_LOGIN_URL": (str, "/login/", "", ""), "SOCIAL_AUTH_RAISE_EXCEPTIONS": (bool, False, "", True), "SOCIAL_AUTH_REDIRECT_IS_HTTPS": (bool, True, "", False), - "STATIC_FILE_STORAGE": ( - str, - "django.core.files.storage.FileSystemStorage", - setting("storages"), - ), + "STATIC_FILE_STORAGE": (str, "django.core.files.storage.FileSystemStorage", setting("storages")), "STATIC_ROOT": (str, None, setting("static-root")), "STATIC_URL": (str, "/static/", setting("static-url")), "TIME_ZONE": (str, "UTC", setting("std-setting-TIME_ZONE")), diff --git a/src/hope_dedup_engine/config/fragments/constance.py b/src/hope_dedup_engine/config/fragments/constance.py index e6b7146b..555dbc49 100644 --- a/src/hope_dedup_engine/config/fragments/constance.py +++ b/src/hope_dedup_engine/config/fragments/constance.py @@ -6,11 +6,7 @@ CONSTANCE_CONFIG = { "NEW_USER_IS_STAFF": (False, "Set any new user as staff", bool), - "NEW_USER_DEFAULT_GROUP": ( - DEFAULT_GROUP_NAME, - "Group to assign to any new user", - str, - ), + "NEW_USER_DEFAULT_GROUP": (DEFAULT_GROUP_NAME, "Group to assign to any new user", str), "DNN_BACKEND": ( cv2.dnn.DNN_BACKEND_OPENCV, "Specifies the computation backend to be used by OpenCV for deep learning inference.", diff --git a/src/hope_dedup_engine/config/fragments/csp.py b/src/hope_dedup_engine/config/fragments/csp.py index a0e02fbd..3070bdaa 100644 --- a/src/hope_dedup_engine/config/fragments/csp.py +++ b/src/hope_dedup_engine/config/fragments/csp.py @@ -1,27 +1,7 @@ # CSP_DEFAULT_SRC = ["'self'", "'unsafe-inline'", "'same-origin'", "fonts.googleapis.com", 'fonts.gstatic.com', 'data:', # 'blob:', "cdn.redoc.ly"] CSP_DEFAULT_SRC = ["'self'", "'unsafe-inline'"] -CSP_STYLE_SRC = [ - "'self'", - "'unsafe-inline'", - "same-origin", - "fonts.googleapis.com", - "fonts.gstatic.com", -] +CSP_STYLE_SRC = ["'self'", "'unsafe-inline'", "same-origin", "fonts.googleapis.com", "fonts.gstatic.com"] CSP_SCRIPT_SRC = ["'self'", "'unsafe-inline'", "same-origin", "blob:"] -CSP_IMG_SRC = [ - "'self'", - "'unsafe-inline'", - "same-origin", - "blob:", - "data:", - "cdn.redoc.ly", -] -CSP_FONT_SRC = [ - "'self'", - "fonts.googleapis.com", - "same-origin", - "fonts.googleapis.com", - "fonts.gstatic.com", - "blob:", -] +CSP_IMG_SRC = ["'self'", "'unsafe-inline'", "same-origin", "blob:", "data:", "cdn.redoc.ly"] +CSP_FONT_SRC = ["'self'", "fonts.googleapis.com", "same-origin", "fonts.googleapis.com", "fonts.gstatic.com", "blob:"] diff --git a/src/hope_dedup_engine/state.py b/src/hope_dedup_engine/state.py index 28253cbf..0973df8f 100644 --- a/src/hope_dedup_engine/state.py +++ b/src/hope_dedup_engine/state.py @@ -38,16 +38,7 @@ def add_cookie( samesite: str | None = None, ) -> None: value = json.dumps(value) - self.cookies[key] = [ - value, - max_age, - expires, - path, - domain, - secure, - httponly, - samesite, - ] + self.cookies[key] = [value, max_age, expires, path, domain, secure, httponly, samesite] def get_cookie(self, name: str) -> Optional[str]: return self.request.COOKIES.get(name) diff --git a/src/hope_dedup_engine/utils/http.py b/src/hope_dedup_engine/utils/http.py index 236b1a9b..f4300e8d 100644 --- a/src/hope_dedup_engine/utils/http.py +++ b/src/hope_dedup_engine/utils/http.py @@ -39,7 +39,5 @@ def absolute_uri(url: str | None = None) -> str: return uri -def absolute_reverse( - name: str, args: Tuple[Any] | None = None, kwargs: Dict[str, Any] | None = None -) -> str: +def absolute_reverse(name: str, args: Tuple[Any] | None = None, kwargs: Dict[str, Any] | None = None) -> str: return absolute_uri(reverse(name, args=args, kwargs=kwargs)) diff --git a/src/hope_dedup_engine/utils/security.py b/src/hope_dedup_engine/utils/security.py index 9ee29f33..4ed19bd0 100644 --- a/src/hope_dedup_engine/utils/security.py +++ b/src/hope_dedup_engine/utils/security.py @@ -4,7 +4,4 @@ def is_root(request: Any, *args: Any, **kwargs: Any) -> bool: - return ( - request.user.is_superuser - and request.headers.get(settings.ROOT_TOKEN_HEADER) == settings.ROOT_TOKEN != "" - ) + return request.user.is_superuser and request.headers.get(settings.ROOT_TOKEN_HEADER) == settings.ROOT_TOKEN != ""