Skip to content

Commit

Permalink
chg ! config for deduplicationset
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Oct 31, 2024
1 parent 17dca4a commit 16aca37
Show file tree
Hide file tree
Showing 24 changed files with 930 additions and 571 deletions.
1,090 changes: 632 additions & 458 deletions pdm.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dependencies = [
"flower>=2.0.1",
"setuptools>=74.1.2",
"django-smart-env>=0.1.0",
"jsonschema>=4.23.0",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@ markers =
python_files=test_*.py
filterwarnings =
ignore::DeprecationWarning
ignore::django.utils.deprecation.RemovedInDjango51Warning
ignore::django.utils.deprecation.RemovedInDjango60Warning
ignore::coverage.exceptions.CoverageWarning
ignore::coverage.exceptions.CoverageWarning:
2 changes: 1 addition & 1 deletion src/hope_dedup_engine/apps/api/admin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@

@register(Config)
class ConfigAdmin(ModelAdmin):
pass
list_display = ("name", "settings")
1 change: 1 addition & 0 deletions src/hope_dedup_engine/apps/api/admin/deduplicationset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class DeduplicationSetAdmin(AdminFiltersMixin, ExtraButtonsMixin, ModelAdmin):
"name",
"reference_pk",
"state_value",
"config",
"created_at",
"updated_at",
"deleted",
Expand Down
12 changes: 5 additions & 7 deletions src/hope_dedup_engine/apps/api/deduplication/adapters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from collections.abc import Generator

from constance import config
from typing import Any

from hope_dedup_engine.apps.api.deduplication.registry import DuplicateKeyPair
from hope_dedup_engine.apps.api.models import DeduplicationSet
Expand All @@ -22,13 +21,12 @@ def run(self) -> Generator[DuplicateKeyPair, None, None]:
"reference_pk", "filename"
)
}
face_distance_threshold: float = (
self.deduplication_set.config
and self.deduplication_set.config.face_distance_threshold
) or config.FACE_DISTANCE_THRESHOLD
ds_config: dict[str, Any] = (
self.deduplication_set.config and self.deduplication_set.config.settings
) or {}
# ignored key pairs are not handled correctly in DuplicationDetector
detector = DuplicationDetector(
tuple[str](filename_to_reference_pk.keys()), face_distance_threshold
tuple[str](filename_to_reference_pk.keys()), ds_config
)
for first_filename, second_filename, distance in detector.find_duplicates():
yield filename_to_reference_pk[first_filename], filename_to_reference_pk[
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 5.0.7 on 2024-10-31 06:31

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("api", "0008_ignoredfilenamepair"),
]

operations = [
migrations.RemoveField(
model_name="config",
name="face_distance_threshold",
),
migrations.AddField(
model_name="config",
name="name",
field=models.CharField(
blank=True, db_index=True, max_length=128, null=True, unique=True
),
),
migrations.AddField(
model_name="config",
name="settings",
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name="deduplicationset",
name="config",
field=models.ForeignKey(
null=True, on_delete=django.db.models.deletion.SET_NULL, to="api.config"
),
),
]
2 changes: 1 addition & 1 deletion src/hope_dedup_engine/apps/api/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from hope_dedup_engine.apps.api.models.auth import HDEToken # noqa: F401
from hope_dedup_engine.apps.api.models.config import Config # noqa: F401
from hope_dedup_engine.apps.api.models.deduplication import ( # noqa: F401
Config,
DeduplicationSet,
Duplicate,
Image,
Expand Down
25 changes: 25 additions & 0 deletions src/hope_dedup_engine/apps/api/models/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from django.core.exceptions import ValidationError
from django.db import models

from jsonschema import ValidationError as JSONSchemaValidationError

from hope_dedup_engine.apps.api.utils.config_schema import (
DefaultValidatingValidator,
settings_schema,
)


class Config(models.Model):
name = models.CharField(
max_length=128, unique=True, null=True, blank=True, db_index=True
)
settings = models.JSONField(default=dict, null=True, blank=True)

def __str__(self) -> str:
return f"{self.name}" if self.name else f"ID: {self.pk}"

def clean(self) -> None:
try:
DefaultValidatingValidator(settings_schema).validate(self.settings)
except JSONSchemaValidationError as e:
raise ValidationError({"settings": e.message})
17 changes: 1 addition & 16 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from uuid import uuid4

from django.conf import settings
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models

from hope_dedup_engine.apps.api.utils.notification import send_notification
Expand All @@ -11,20 +10,6 @@
REFERENCE_PK_LENGTH: Final[int] = 100


class Config(models.Model):
face_distance_threshold = models.FloatField(
null=True,
validators=[MinValueValidator(0.1), MaxValueValidator(1.0)],
)

def __str__(self) -> str:
return f"{self.pk}: " + " | ".join(
f"{field.name}: {getattr(self, field.name)}"
for field in self._meta.fields
if field.name not in ("id",)
)


class DeduplicationSet(models.Model):
"""
Bucket for entries we want to deduplicate
Expand Down Expand Up @@ -69,7 +54,7 @@ class State(models.IntegerChoices):
)
updated_at = models.DateTimeField(auto_now=True)
notification_url = models.CharField(max_length=255, null=True, blank=True)
config = models.OneToOneField(Config, null=True, on_delete=models.SET_NULL)
config = models.ForeignKey("Config", null=True, on_delete=models.SET_NULL)

@property
def state(self) -> State:
Expand Down
24 changes: 13 additions & 11 deletions src/hope_dedup_engine/apps/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
from typing import Any

from jsonschema import Draft202012Validator
from jsonschema import ValidationError as JSONSchemaValidationError
from rest_framework import serializers

from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models import Config, DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import (
Config,
Duplicate,
IgnoredFilenamePair,
IgnoredReferencePkPair,
Image,
)

CONFIG = "config"
from hope_dedup_engine.apps.api.utils.config_schema import settings_schema


class ConfigSerializer(serializers.ModelSerializer):
class Meta:
model = Config
exclude = ("id",)

def validate_settings(self, value):
validator = Draft202012Validator(settings_schema)
try:
validator.validate(value)
except JSONSchemaValidationError as e:
raise serializers.ValidationError(f"Settings validation error: {e.message}")
return value


class DeduplicationSetSerializer(serializers.ModelSerializer):
state = serializers.CharField(source="get_state_value_display", read_only=True)
Expand All @@ -36,22 +44,16 @@ class Meta:
"updated_by",
)

def create(self, validated_data) -> DeduplicationSet:
config_data = validated_data.get(CONFIG) and validated_data.pop(CONFIG)
config = Config.objects.create(**config_data) if config_data else None
return DeduplicationSet.objects.create(config=config, **validated_data)


class CreateConfigSerializer(ConfigSerializer):
pass


class CreateDeduplicationSetSerializer(serializers.ModelSerializer):
config = CreateConfigSerializer(required=False)

class Meta:
model = DeduplicationSet
fields = ("config", "reference_pk", "notification_url")
fields = ("reference_pk", "notification_url")


class ImageSerializer(serializers.ModelSerializer):
Expand Down
97 changes: 97 additions & 0 deletions src/hope_dedup_engine/apps/api/utils/config_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from django.conf import settings

from constance import config
from jsonschema import Draft202012Validator, validators

settings_schema: dict = {
"type": "object",
"properties": {
"detection": {
"type": "object",
"properties": {
"confidence": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 1.0,
"default": "constance.config.FACE_DETECTION_CONFIDENCE",
},
},
"default": {},
},
"recognition": {
"type": "object",
"properties": {
"num_jitters": {
"type": "integer",
"minimum": 1,
"default": "constance.config.FACE_ENCODINGS_NUM_JITTERS",
},
"model": {
"type": "string",
"enum": tuple(
ch[0]
for ch in settings.CONSTANCE_ADDITIONAL_FIELDS.get(
"face_encodings_model"
)[1].get("choices")
),
"default": "constance.config.FACE_ENCODINGS_MODEL",
},
"preprocessors": {
type: "array",
"items": {
"type": "string",
"enum": ["contrast"],
},
"uniqueItems": True,
"default": [],
},
},
"default": {},
},
"duplicates": {
"type": "object",
"properties": {
"tolerance": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 1.0,
"default": "constance.config.FACE_DISTANCE_THRESHOLD",
},
},
"default": {},
},
},
}


def extend_with_default(validator_class):
validate_properties = validator_class.VALIDATORS["properties"]

def set_defaults(validator, properties, instance, schema):
for property, subschema in properties.items():

if "default" in subschema:
default_value = subschema["default"]
if isinstance(default_value, str) and default_value.startswith(
"constance.config."
):
config_name = default_value.split(".")[-1]
default_value = getattr(config, config_name)

instance.setdefault(property, default_value)

for error in validate_properties(
validator,
properties,
instance,
schema,
):
yield error

return validators.extend(
validator_class,
{"properties": set_defaults},
)


DefaultValidatingValidator = extend_with_default(Draft202012Validator)
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def handle(self, *args: Any, **options: dict[str, Any]) -> None:
self.halt(FileNotFoundError(MESSAGES["not_exist"] % storage.src))
self.stdout.write(MESSAGES["storage_success"] % storage.name)
logger.info(MESSAGES["storage_success"] % storage.name)
except (CommandError, SystemCheckError) as e:
except (CommandError, FileNotFoundError, SystemCheckError) as e:
self.stdout.write(
self.style.ERROR(MESSAGES["failed"] % (storage.name, e))
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,23 @@ class DuplicationDetector:
def __init__(
self,
filenames: tuple[str],
face_distance_threshold: float,
ds_config: dict[str, Any] = None,
ignore_pairs: tuple[tuple[str, str], ...] = (),
) -> None:
"""
Initialize the DuplicationDetector with the given filenames and ignore pairs.
Args:
filenames (tuple[str]): The filenames of the images to process.
ds_config (dict[str, Any], optional): The configuration settings for the deduplication set.
ignore_pairs (tuple[tuple[str, str]], optional):
The pairs of filenames to ignore. Defaults to an empty tuple.
"""
self.filenames = filenames
self.face_distance_threshold = face_distance_threshold
self.face_distance_threshold = ds_config.get("duplicates").get("tolerance")
self.ignore_set = IgnorePairsValidator.validate(ignore_pairs)
self.storages = StorageManager()
self.image_processor = ImageProcessor(face_distance_threshold)
self.image_processor = ImageProcessor(ds_config)

def _encodings_filename(self, filename: str) -> str:
"""
Expand Down
15 changes: 10 additions & 5 deletions src/hope_dedup_engine/apps/faces/services/image_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ class ImageProcessor:

logger: logging.Logger = logging.getLogger(__name__)

def __init__(self, face_distance_threshold: float) -> None:
def __init__(
self,
ds_config: dict[str, Any] = None,
) -> None:
"""
Initialize the ImageProcessor with the required configurations.
"""
Expand All @@ -72,11 +75,13 @@ def __init__(self, face_distance_threshold: float) -> None:
),
)
self.face_encodings_cfg = FaceEncodingsConfig(
num_jitters=config.FACE_ENCODINGS_NUM_JITTERS,
model=config.FACE_ENCODINGS_MODEL,
num_jitters=ds_config.get("recognition").get("num_jitters"),
model=ds_config.get("recognition").get("model"),
)
self.face_detection_confidence: float = config.FACE_DETECTION_CONFIDENCE
self.distance_threshold: float = face_distance_threshold
self.face_detection_confidence: float = ds_config.get("detection").get(
"confidence"
)
self.distance_threshold: float = ds_config.get("duplicates").get("tolerance")
self.nms_threshold: float = config.NMS_THRESHOLD

def _get_face_detections_dnn(
Expand Down
Loading

0 comments on commit 16aca37

Please sign in to comment.