From b9ab16d3ae266cf5449c80019ce5cd6012a247da Mon Sep 17 00:00:00 2001
From: Vitali Yanushchyk <vitali.yanushchyk@valor-software.com>
Date: Thu, 30 May 2024 11:38:59 -0400
Subject: [PATCH] chg ! refactor DuplicationDetector, add NMS

---
 .../apps/faces/utils/duplication_detector.py  | 61 +++++++++---
 .../config/fragments/constance.py             | 94 +++++++++++++++++--
 2 files changed, 138 insertions(+), 17 deletions(-)

diff --git a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py
index a5fde88d..d7292267 100644
--- a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py
+++ b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py
@@ -15,9 +15,19 @@
 
 
 class DuplicationDetector:
+    """
+    A class to detect and process duplicate faces in images.
+    """
+
     def __init__(self, filename: str) -> None:
-        self.logger = logging.getLogger(__name__)
-        self.storages = {
+        """
+        Initialize the DuplicationDetector with the given filename.
+
+        Args:
+            filename (str): The filename of the image to process.
+        """
+        self.logger: logging.Logger = logging.getLogger(__name__)
+        self.storages: Dict[str, CV2DNNStorage | HDEAzureStorage | HOPEAzureStorage] = {
             "images": HOPEAzureStorage(),
             "cv2dnn": CV2DNNStorage(settings.CV2DNN_PATH),
             "encoded": HDEAzureStorage(),
@@ -28,12 +38,16 @@ def __init__(self, filename: str) -> None:
                 raise FileNotFoundError(f"File {file} does not exist in storage.")
 
         self.shape: Dict[str, int] = self._get_shape()
-        self.net = self._set_net(self.storages.get("cv2dnn"))
+        self.net: cv2.dnn_Net = self._set_net(self.storages.get("cv2dnn"))
 
         self.filename: str = filename
-        self.encodings_filename = f"{self.filename}.npy"
+        self.encodings_filename: str = f"{self.filename}.npy"
+        self.scale_factor: float = config.SCALE_FACTOR
+        self.mean_values: Tuple[float, float, float] = tuple(map(float, config.MEAN_VALUES.split(",")))
         self.face_detection_confidence: float = config.FACE_DETECTION_CONFIDENCE
+        self.face_detection_model: str = config.FACE_DETECTION_MODEL
         self.distance_threshold: float = config.DISTANCE_THRESHOLD
+        self.nms_threshold: float = config.NMS_THRESHOLD
 
     @property
     def has_encodings(self) -> bool:
@@ -66,22 +80,36 @@ def _get_face_detections_dnn(self) -> List[Tuple[int, int, int, int]]:
         try:
             with self.storages["images"].open(self.filename, "rb") as img_file:
                 img_array = np.frombuffer(img_file.read(), dtype=np.uint8)
+                # Decode image from binary buffer to 3D numpy array (height, width, channels of BlueGreeRed color space)
                 image = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-
             (h, w) = image.shape[:2]
+            # Create a blob (4D tensor) from the image
             blob = cv2.dnn.blobFromImage(
                 image=cv2.resize(image, dsize=(self.shape["height"], self.shape["width"])),
                 size=(self.shape["height"], self.shape["width"]),
-                scalefactor=1.0,
-                mean=(104.0, 177.0, 123.0),
+                scalefactor=self.scale_factor,
+                mean=self.mean_values,
             )
             self.net.setInput(blob)
+            # Forward pass to get output with shape (1, 1, N, 7),
+            # where N is the number of faces and 7 are the detection values:
+            # 1st: image index (0), 2nd: class label (0), 3rd: confidence (0-1),
+            # 4th-5th: x, y coordinates, 6th-7th: width, height
             detections = self.net.forward()
-            for i in range(0, detections.shape[2]):
+            boxes, confidences = [], []
+            for i in range(detections.shape[2]):
                 confidence = detections[0, 0, i, 2]
+                # Filter out weak detections by ensuring the confidence is greater than the minimum confidence
                 if confidence > self.face_detection_confidence:
-                    box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
-                    face_regions.append(tuple(box.astype("int").tolist()))
+                    box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype("int")
+                    boxes.append(box)
+                    confidences.append(confidence)
+            if boxes:
+                # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
+                indices = cv2.dnn.NMSBoxes(boxes, confidences, self.face_detection_confidence, self.nms_threshold)
+                if indices is not None:
+                    for i in indices:
+                        face_regions.append(tuple(boxes[i]))
         except Exception as e:
             self.logger.exception(f"Error processing face detection for image {self.filename}", exc_info=e)
             raise e
@@ -112,7 +140,10 @@ def _encode_face(self) -> None:
             for region in face_regions:
                 if isinstance(region, (list, tuple)) and len(region) == 4:
                     top, right, bottom, left = region
-                    face_encodings = face_recognition.face_encodings(image, [(top, right, bottom, left)], model="hog")
+                    # Compute the face encodings for the face regions in the image
+                    face_encodings = face_recognition.face_encodings(
+                        image, [(top, right, bottom, left)], model=self.face_detection_model
+                    )
                     encodings.extend(face_encodings)
                 else:
                     self.logger.error(f"Invalid face region {region}")
@@ -123,11 +154,18 @@ def _encode_face(self) -> None:
             raise e
 
     def find_duplicates(self) -> Tuple[str]:
+        """
+        Find and return a list of duplicate images based on face encodings.
+
+        Returns:
+            Tuple[str]: A tuple of filenames of duplicate images.
+        """
         duplicated_images = set()
         path1 = self.filename
         try:
             if not self.has_encodings:
                 self._encode_face()
+
             encodings_all = self._load_encodings_all()
             encodings1 = encodings_all[path1]
 
@@ -141,6 +179,7 @@ def find_duplicates(self) -> Tuple[str]:
                                 break
                         if path2 in duplicated_images:
                             break
+
             return tuple(duplicated_images)
         except Exception as e:
             self.logger.exception(f"Error finding duplicates for image {path1}", exc_info=e)
diff --git a/src/hope_dedup_engine/config/fragments/constance.py b/src/hope_dedup_engine/config/fragments/constance.py
index 6ecec0b6..0fa1a04d 100644
--- a/src/hope_dedup_engine/config/fragments/constance.py
+++ b/src/hope_dedup_engine/config/fragments/constance.py
@@ -14,19 +14,89 @@
 CONSTANCE_CONFIG = {
     "NEW_USER_IS_STAFF": (False, "Set any new user as staff", bool),
     "NEW_USER_DEFAULT_GROUP": (DEFAULT_GROUP_NAME, "Group to assign to any new user", str),
-    "FACE_DETECTION_CONFIDENCE": (0.7, "Face detection confidence threshold", float),
-    "DISTANCE_THRESHOLD": (0.5, "Face distance threshold", float),
-    "DNN_BACKEND": (cv2.dnn.DNN_TARGET_CPU, "DNN backend", "dnn_backend"),
-    "DNN_TARGET": (cv2.dnn.DNN_TARGET_CPU, "DNN target", "dnn_target"),
+    "DNN_BACKEND": (
+        cv2.dnn.DNN_BACKEND_OPENCV,
+        "Specifies the computation backend to be used by OpenCV for deep learning inference.",
+        "dnn_backend",
+    ),
+    "DNN_TARGET": (
+        cv2.dnn.DNN_TARGET_CPU,
+        "Specifies the target device on which OpenCV will perform the deep learning computations.",
+        "dnn_target",
+    ),
+    "SCALE_FACTOR": (
+        1.0,
+        """Specifies the scaling factor applied to all pixel values when converting an image to a blob. Mostly
+        it equals 1.0 for no scaling or 1.0/255.0 and normalizing to the [0, 1] range.
+        Remember that mean values are also applied to scaling factor. Both scaling factor and mean values
+        must be the same for the training and inference to get the correct results.
+        """,
+        float,
+    ),
+    "MEAN_VALUES": (
+        "104.0, 177.0, 123.0",
+        """Specifies the mean BGR values used in image preprocessing to normalize pixel values by subtracting
+        the mean values of the training dataset. This helps in reducing model bias and improving accuracy.
+        The specified mean values are subtracted from each channel (Blue, Green, Red) of the input image.
+        Remember that mean values are also applied to scaling factor. Both scaling factor and mean values
+        must be the same for the training and inference to get the correct results.
+        """,
+        "tuple_field",
+    ),
+    "FACE_DETECTION_CONFIDENCE": (
+        0.7,
+        """
+        Specifies the minimum confidence score required for a detected face to be considered valid. Detections
+        with confidence scores below this threshold are discarded as likely false positives.
+        """,
+        float,
+    ),
+    "NMS_THRESHOLD": (
+        0.4,
+        """
+        Specifies the Intersection over Union (IoU) threshold used in Non-Maximum Suppression (NMS) to filter out
+        overlapping bounding boxes. If the IoU between two boxes exceeds this threshold, the box with the lower
+        confidence score is suppressed. Lower values result in fewer, more distinct boxes; higher values allow more
+        overlapping boxes to remain.
+        """,
+        float,
+    ),
+    "DISTANCE_THRESHOLD": (
+        0.5,
+        """
+        Specifies the maximum allowable distance between two face embeddings for them to be considered a match. It helps
+        determine if two faces belong to the same person by setting a threshold for similarity. Lower values result in
+        stricter matching, while higher values allow for more lenient matches.
+        """,
+        float,
+    ),
+    "FACE_DETECTION_MODEL": (
+        "hog",
+        """
+        Specifies the model type used for face detection. It can be either faster 'hog'(Histogram of Oriented Gradients)
+        or more accurate 'cnn'(Convolutional Neural Network).",
+        """,
+        "face_detection_model",
+    ),
 }
 
+
 CONSTANCE_CONFIG_FIELDSETS = {
     "User settings": {
         "fields": ("NEW_USER_IS_STAFF", "NEW_USER_DEFAULT_GROUP"),
         "collapse": False,
     },
     "Face recognition settings": {
-        "fields": ("FACE_DETECTION_CONFIDENCE", "DISTANCE_THRESHOLD", "DNN_BACKEND", "DNN_TARGET"),
+        "fields": (
+            "DNN_BACKEND",
+            "DNN_TARGET",
+            "SCALE_FACTOR",
+            "MEAN_VALUES",
+            "FACE_DETECTION_CONFIDENCE",
+            "NMS_THRESHOLD",
+            "DISTANCE_THRESHOLD",
+            "FACE_DETECTION_MODEL",
+        ),
         "collapse": False,
     },
 }
@@ -35,7 +105,7 @@
     "dnn_backend": [
         "django.forms.ChoiceField",
         {
-            "choices": ((cv2.dnn.DNN_TARGET_CPU, "DNN_TARGET_CPU"),),
+            "choices": ((cv2.dnn.DNN_BACKEND_OPENCV, "DNN_BACKEND_OPENCV"),),
         },
     ],
     "dnn_target": [
@@ -44,4 +114,16 @@
             "choices": ((cv2.dnn.DNN_TARGET_CPU, "DNN_TARGET_CPU"),),
         },
     ],
+    "face_detection_model": [
+        "django.forms.ChoiceField",
+        {
+            "choices": (("hog", "HOG"), ("cnn", "CNN")),
+        },
+    ],
+    "tuple_field": [
+        "django.forms.CharField",
+        {
+            "widget": "django.forms.TextInput",
+        },
+    ],
 }