From b9ab16d3ae266cf5449c80019ce5cd6012a247da Mon Sep 17 00:00:00 2001 From: Vitali Yanushchyk Date: Thu, 30 May 2024 11:38:59 -0400 Subject: [PATCH] chg ! refactor DuplicationDetector, add NMS --- .../apps/faces/utils/duplication_detector.py | 61 +++++++++--- .../config/fragments/constance.py | 94 +++++++++++++++++-- 2 files changed, 138 insertions(+), 17 deletions(-) diff --git a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py index a5fde88d..d7292267 100644 --- a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py +++ b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py @@ -15,9 +15,19 @@ class DuplicationDetector: + """ + A class to detect and process duplicate faces in images. + """ + def __init__(self, filename: str) -> None: - self.logger = logging.getLogger(__name__) - self.storages = { + """ + Initialize the DuplicationDetector with the given filename. + + Args: + filename (str): The filename of the image to process. + """ + self.logger: logging.Logger = logging.getLogger(__name__) + self.storages: Dict[str, CV2DNNStorage | HDEAzureStorage | HOPEAzureStorage] = { "images": HOPEAzureStorage(), "cv2dnn": CV2DNNStorage(settings.CV2DNN_PATH), "encoded": HDEAzureStorage(), @@ -28,12 +38,16 @@ def __init__(self, filename: str) -> None: raise FileNotFoundError(f"File {file} does not exist in storage.") self.shape: Dict[str, int] = self._get_shape() - self.net = self._set_net(self.storages.get("cv2dnn")) + self.net: cv2.dnn_Net = self._set_net(self.storages.get("cv2dnn")) self.filename: str = filename - self.encodings_filename = f"{self.filename}.npy" + self.encodings_filename: str = f"{self.filename}.npy" + self.scale_factor: float = config.SCALE_FACTOR + self.mean_values: Tuple[float, float, float] = tuple(map(float, config.MEAN_VALUES.split(","))) self.face_detection_confidence: float = config.FACE_DETECTION_CONFIDENCE + self.face_detection_model: str = config.FACE_DETECTION_MODEL self.distance_threshold: float = config.DISTANCE_THRESHOLD + self.nms_threshold: float = config.NMS_THRESHOLD @property def has_encodings(self) -> bool: @@ -66,22 +80,36 @@ def _get_face_detections_dnn(self) -> List[Tuple[int, int, int, int]]: try: with self.storages["images"].open(self.filename, "rb") as img_file: img_array = np.frombuffer(img_file.read(), dtype=np.uint8) + # Decode image from binary buffer to 3D numpy array (height, width, channels of BlueGreeRed color space) image = cv2.imdecode(img_array, cv2.IMREAD_COLOR) - (h, w) = image.shape[:2] + # Create a blob (4D tensor) from the image blob = cv2.dnn.blobFromImage( image=cv2.resize(image, dsize=(self.shape["height"], self.shape["width"])), size=(self.shape["height"], self.shape["width"]), - scalefactor=1.0, - mean=(104.0, 177.0, 123.0), + scalefactor=self.scale_factor, + mean=self.mean_values, ) self.net.setInput(blob) + # Forward pass to get output with shape (1, 1, N, 7), + # where N is the number of faces and 7 are the detection values: + # 1st: image index (0), 2nd: class label (0), 3rd: confidence (0-1), + # 4th-5th: x, y coordinates, 6th-7th: width, height detections = self.net.forward() - for i in range(0, detections.shape[2]): + boxes, confidences = [], [] + for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] + # Filter out weak detections by ensuring the confidence is greater than the minimum confidence if confidence > self.face_detection_confidence: - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - face_regions.append(tuple(box.astype("int").tolist())) + box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype("int") + boxes.append(box) + confidences.append(confidence) + if boxes: + # Apply non-maxima suppression to suppress weak, overlapping bounding boxes + indices = cv2.dnn.NMSBoxes(boxes, confidences, self.face_detection_confidence, self.nms_threshold) + if indices is not None: + for i in indices: + face_regions.append(tuple(boxes[i])) except Exception as e: self.logger.exception(f"Error processing face detection for image {self.filename}", exc_info=e) raise e @@ -112,7 +140,10 @@ def _encode_face(self) -> None: for region in face_regions: if isinstance(region, (list, tuple)) and len(region) == 4: top, right, bottom, left = region - face_encodings = face_recognition.face_encodings(image, [(top, right, bottom, left)], model="hog") + # Compute the face encodings for the face regions in the image + face_encodings = face_recognition.face_encodings( + image, [(top, right, bottom, left)], model=self.face_detection_model + ) encodings.extend(face_encodings) else: self.logger.error(f"Invalid face region {region}") @@ -123,11 +154,18 @@ def _encode_face(self) -> None: raise e def find_duplicates(self) -> Tuple[str]: + """ + Find and return a list of duplicate images based on face encodings. + + Returns: + Tuple[str]: A tuple of filenames of duplicate images. + """ duplicated_images = set() path1 = self.filename try: if not self.has_encodings: self._encode_face() + encodings_all = self._load_encodings_all() encodings1 = encodings_all[path1] @@ -141,6 +179,7 @@ def find_duplicates(self) -> Tuple[str]: break if path2 in duplicated_images: break + return tuple(duplicated_images) except Exception as e: self.logger.exception(f"Error finding duplicates for image {path1}", exc_info=e) diff --git a/src/hope_dedup_engine/config/fragments/constance.py b/src/hope_dedup_engine/config/fragments/constance.py index 6ecec0b6..0fa1a04d 100644 --- a/src/hope_dedup_engine/config/fragments/constance.py +++ b/src/hope_dedup_engine/config/fragments/constance.py @@ -14,19 +14,89 @@ CONSTANCE_CONFIG = { "NEW_USER_IS_STAFF": (False, "Set any new user as staff", bool), "NEW_USER_DEFAULT_GROUP": (DEFAULT_GROUP_NAME, "Group to assign to any new user", str), - "FACE_DETECTION_CONFIDENCE": (0.7, "Face detection confidence threshold", float), - "DISTANCE_THRESHOLD": (0.5, "Face distance threshold", float), - "DNN_BACKEND": (cv2.dnn.DNN_TARGET_CPU, "DNN backend", "dnn_backend"), - "DNN_TARGET": (cv2.dnn.DNN_TARGET_CPU, "DNN target", "dnn_target"), + "DNN_BACKEND": ( + cv2.dnn.DNN_BACKEND_OPENCV, + "Specifies the computation backend to be used by OpenCV for deep learning inference.", + "dnn_backend", + ), + "DNN_TARGET": ( + cv2.dnn.DNN_TARGET_CPU, + "Specifies the target device on which OpenCV will perform the deep learning computations.", + "dnn_target", + ), + "SCALE_FACTOR": ( + 1.0, + """Specifies the scaling factor applied to all pixel values when converting an image to a blob. Mostly + it equals 1.0 for no scaling or 1.0/255.0 and normalizing to the [0, 1] range. + Remember that mean values are also applied to scaling factor. Both scaling factor and mean values + must be the same for the training and inference to get the correct results. + """, + float, + ), + "MEAN_VALUES": ( + "104.0, 177.0, 123.0", + """Specifies the mean BGR values used in image preprocessing to normalize pixel values by subtracting + the mean values of the training dataset. This helps in reducing model bias and improving accuracy. + The specified mean values are subtracted from each channel (Blue, Green, Red) of the input image. + Remember that mean values are also applied to scaling factor. Both scaling factor and mean values + must be the same for the training and inference to get the correct results. + """, + "tuple_field", + ), + "FACE_DETECTION_CONFIDENCE": ( + 0.7, + """ + Specifies the minimum confidence score required for a detected face to be considered valid. Detections + with confidence scores below this threshold are discarded as likely false positives. + """, + float, + ), + "NMS_THRESHOLD": ( + 0.4, + """ + Specifies the Intersection over Union (IoU) threshold used in Non-Maximum Suppression (NMS) to filter out + overlapping bounding boxes. If the IoU between two boxes exceeds this threshold, the box with the lower + confidence score is suppressed. Lower values result in fewer, more distinct boxes; higher values allow more + overlapping boxes to remain. + """, + float, + ), + "DISTANCE_THRESHOLD": ( + 0.5, + """ + Specifies the maximum allowable distance between two face embeddings for them to be considered a match. It helps + determine if two faces belong to the same person by setting a threshold for similarity. Lower values result in + stricter matching, while higher values allow for more lenient matches. + """, + float, + ), + "FACE_DETECTION_MODEL": ( + "hog", + """ + Specifies the model type used for face detection. It can be either faster 'hog'(Histogram of Oriented Gradients) + or more accurate 'cnn'(Convolutional Neural Network).", + """, + "face_detection_model", + ), } + CONSTANCE_CONFIG_FIELDSETS = { "User settings": { "fields": ("NEW_USER_IS_STAFF", "NEW_USER_DEFAULT_GROUP"), "collapse": False, }, "Face recognition settings": { - "fields": ("FACE_DETECTION_CONFIDENCE", "DISTANCE_THRESHOLD", "DNN_BACKEND", "DNN_TARGET"), + "fields": ( + "DNN_BACKEND", + "DNN_TARGET", + "SCALE_FACTOR", + "MEAN_VALUES", + "FACE_DETECTION_CONFIDENCE", + "NMS_THRESHOLD", + "DISTANCE_THRESHOLD", + "FACE_DETECTION_MODEL", + ), "collapse": False, }, } @@ -35,7 +105,7 @@ "dnn_backend": [ "django.forms.ChoiceField", { - "choices": ((cv2.dnn.DNN_TARGET_CPU, "DNN_TARGET_CPU"),), + "choices": ((cv2.dnn.DNN_BACKEND_OPENCV, "DNN_BACKEND_OPENCV"),), }, ], "dnn_target": [ @@ -44,4 +114,16 @@ "choices": ((cv2.dnn.DNN_TARGET_CPU, "DNN_TARGET_CPU"),), }, ], + "face_detection_model": [ + "django.forms.ChoiceField", + { + "choices": (("hog", "HOG"), ("cnn", "CNN")), + }, + ], + "tuple_field": [ + "django.forms.CharField", + { + "widget": "django.forms.TextInput", + }, + ], }