Testing bounding boxes in WithMixedPrecisionTest (keras-team#1860)

* added bounding boxes and fixed test configs * added a batch axis * Fix a few of the layers * fixed one more test. 12 to go * added flags for tests... 10F at the moment * fixed 3 more tests... 7F at the moment * fix for cutMix * all tests fixed * styling fixes * linting * linting again * add string dtypes for keras core * add condition for no_bbox_tests * trying to make CI happy * fix styling issues * remove duplicate tests * remove check inside nogpu * fix cutmix --------- Co-authored-by: ianjjohnson <[email protected]>
y-vectorfield · Jul 31, 2023 · c8a80d4 · c8a80d4
1 parent 6005ff8
commit c8a80d4
Show file tree

Hide file tree

Showing 3 changed files with 123 additions and 27 deletions.
diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py
@@ -484,7 +484,9 @@ def _augment(self, inputs):
                 image=raw_image,
             )
 
-            bounding_boxes = bounding_box.to_ragged(bounding_boxes)
+            bounding_boxes = bounding_box.to_ragged(
+                bounding_boxes, dtype=self.compute_dtype
+            )
             result[BOUNDING_BOXES] = bounding_boxes
 
         if keypoints is not None:

diff --git a/keras_cv/layers/preprocessing/random_crop.py b/keras_cv/layers/preprocessing/random_crop.py
@@ -78,11 +78,17 @@ def compute_ragged_image_signature(self, images):
         return ragged_spec
 
     def get_random_transformation_batch(self, batch_size, **kwargs):
-        tops = self._random_generator.random_uniform(
-            shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.float32
+        tops = tf.cast(
+            self._random_generator.random_uniform(
+                shape=(batch_size, 1), minval=0, maxval=1
+            ),
+            self.compute_dtype,
         )
-        lefts = self._random_generator.random_uniform(
-            shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.float32
+        lefts = tf.cast(
+            self._random_generator.random_uniform(
+                shape=(batch_size, 1), minval=0, maxval=1
+            ),
+            self.compute_dtype,
         )
         return {"tops": tops, "lefts": lefts}
 
@@ -212,13 +218,14 @@ def _get_image_shape(self, images):
     def _crop_images(self, images, transformations):
         batch_size = tf.shape(images)[0]
         heights, widths = self._get_image_shape(images)
-        heights = tf.cast(heights, dtype=tf.float32)
-        widths = tf.cast(widths, dtype=tf.float32)
+        heights = tf.cast(heights, dtype=self.compute_dtype)
+        widths = tf.cast(widths, dtype=self.compute_dtype)
 
         tops = transformations["tops"]
         lefts = transformations["lefts"]
         x1s = lefts * (widths - self.width)
         y1s = tops * (heights - self.height)
+
         x2s = x1s + self.width
         y2s = y1s + self.height
         # normalize
@@ -229,8 +236,8 @@ def _crop_images(self, images, transformations):
         boxes = tf.concat([y1s, x1s, y2s, x2s], axis=-1)
 
         images = tf.image.crop_and_resize(
-            images,
-            boxes,
+            tf.cast(images, tf.float32),
+            tf.cast(boxes, tf.float32),
             tf.range(batch_size),
             [self.height, self.width],
             method="nearest",
@@ -246,8 +253,8 @@ def _crop_bounding_boxes(self, images, boxes, transformation):
         tops = transformation["tops"]
         lefts = transformation["lefts"]
         heights, widths = self._get_image_shape(images)
-        heights = tf.cast(heights, dtype=tf.float32)
-        widths = tf.cast(widths, dtype=tf.float32)
+        heights = tf.cast(heights, dtype=self.compute_dtype)
+        widths = tf.cast(widths, dtype=self.compute_dtype)
 
         # compute offsets for xyxy bounding_boxes
         top_offsets = tf.cast(
@@ -259,7 +266,9 @@ def _crop_bounding_boxes(self, images, boxes, transformation):
             dtype=self.compute_dtype,
         )
 
-        x1s, y1s, x2s, y2s = tf.split(boxes, 4, axis=-1)
+        x1s, y1s, x2s, y2s = tf.split(
+            tf.cast(boxes, self.compute_dtype), 4, axis=-1
+        )
         x1s -= tf.expand_dims(left_offsets, axis=1)
         y1s -= tf.expand_dims(top_offsets, axis=1)
         x2s -= tf.expand_dims(left_offsets, axis=1)
@@ -269,11 +278,13 @@ def _crop_bounding_boxes(self, images, boxes, transformation):
 
     def _resize_bounding_boxes(self, images, boxes):
         heights, widths = self._get_image_shape(images)
-        heights = tf.cast(heights, dtype=tf.float32)
-        widths = tf.cast(widths, dtype=tf.float32)
+        heights = tf.cast(heights, dtype=self.compute_dtype)
+        widths = tf.cast(widths, dtype=self.compute_dtype)
         x_scale = tf.cast(self.width / widths, dtype=self.compute_dtype)
         y_scale = tf.cast(self.height / heights, dtype=self.compute_dtype)
-        x1s, y1s, x2s, y2s = tf.split(boxes, 4, axis=-1)
+        x1s, y1s, x2s, y2s = tf.split(
+            tf.cast(boxes, self.compute_dtype), 4, axis=-1
+        )
         outputs = tf.concat(
             [
                 x1s * x_scale[:, tf.newaxis, :],

diff --git a/keras_cv/layers/preprocessing/with_mixed_precision_test.py b/keras_cv/layers/preprocessing/with_mixed_precision_test.py
@@ -30,6 +30,8 @@
             "target_size": (224, 224),
             "crop_area_factor": (0.8, 1.0),
             "aspect_ratio_factor": (3 / 4, 4 / 3),
+            "bounding_box_format": "xywh",
+            "dtype": "float32",
         },
     ),
     ("Grayscale", layers.Grayscale, {}),
@@ -50,7 +52,11 @@
         layers.RandomCutout,
         {"height_factor": 0.2, "width_factor": 0.2},
     ),
-    ("RandomFlip", layers.RandomFlip, {"mode": "horizontal"}),
+    (
+        "RandomFlip",
+        layers.RandomFlip,
+        {"mode": "horizontal", "bounding_box_format": "xyxy"},
+    ),
     (
         "RandomHue",
         layers.RandomHue,
@@ -59,7 +65,12 @@
     (
         "RandomTranslation",
         layers.RandomTranslation,
-        {"width_factor": 0.5, "height_factor": 0.5},
+        {
+            "width_factor": 0.5,
+            "height_factor": 0.5,
+            "bounding_box_format": "xyxy",
+            "dtype": "float32",
+        },
     ),
     (
         "RandomChannelShift",
@@ -86,28 +97,64 @@
     (
         "RandomGaussianBlur",
         layers.RandomGaussianBlur,
-        {"kernel_size": 3, "factor": (0.0, 3.0)},
+        {"kernel_size": 3, "factor": (0.0, 3.0), "dtype": "float32"},
+    ),
+    (
+        "RandomJpegQuality",
+        layers.RandomJpegQuality,
+        {"factor": (75, 100), "dtype": "float32"},
+    ),
+    (
+        "RandomRotation",
+        layers.RandomRotation,
+        {
+            "factor": 0.5,
+            "bounding_box_format": "xyxy",
+            "dtype": "float32",
+        },
     ),
-    ("RandomJpegQuality", layers.RandomJpegQuality, {"factor": (75, 100)}),
-    ("RandomRotation", layers.RandomRotation, {"factor": 0.5}),
     ("RandomSaturation", layers.RandomSaturation, {"factor": 0.5}),
     (
         "RandomSharpness",
         layers.RandomSharpness,
         {"factor": 0.5, "value_range": (0, 255)},
     ),
-    ("RandomAspectRatio", layers.RandomAspectRatio, {"factor": (0.9, 1.1)}),
-    ("RandomShear", layers.RandomShear, {"x_factor": 0.3, "x_factor": 0.3}),
+    (
+        "RandomAspectRatio",
+        layers.RandomAspectRatio,
+        {
+            "factor": (0.9, 1.1),
+            "bounding_box_format": "xyxy",
+            "dtype": "float32",
+        },
+    ),
+    (
+        "RandomShear",
+        layers.RandomShear,
+        {
+            "x_factor": 0.3,
+            "x_factor": 0.3,
+            "bounding_box_format": "xyxy",
+            "dtype": "float32",
+        },
+    ),
     ("Solarization", layers.Solarization, {"value_range": (0, 255)}),
-    ("Mosaic", layers.Mosaic, {}),
-    ("CutMix", layers.CutMix, {}),
+    (
+        "Mosaic",
+        layers.Mosaic,
+        {"bounding_box_format": "xyxy"},
+    ),
+    ("CutMix", layers.CutMix, {"dtype": "float32"}),
     ("MixUp", layers.MixUp, {}),
     (
         "Resizing",
         layers.Resizing,
         {
             "height": 224,
             "width": 224,
+            "bounding_box_format": "xyxy",
+            "pad_to_aspect_ratio": True,
+            "dtype": "float32",
         },
     ),
     (
@@ -117,6 +164,7 @@
             "target_size": (224, 224),
             "scale_factor": (0.8, 1.25),
             "bounding_box_format": "xywh",
+            "dtype": "float32",
         },
     ),
     (
@@ -127,7 +175,7 @@
     (
         "RandomCrop",
         layers.RandomCrop,
-        {"height": 224, "width": 224},
+        {"height": 224, "width": 224, "bounding_box_format": "xyxy"},
     ),
     (
         "Rescaling",
@@ -145,6 +193,12 @@
     layers.RandomHue,
 ]
 
+NO_BOUNDING_BOXES_TESTS = [
+    layers.RandomCutout,
+    layers.RandomZoom,
+    layers.CutMix,
+]
+
 
 class WithMixedPrecisionTest(TestCase):
     @parameterized.named_parameters(*TEST_CONFIGURATIONS)
@@ -162,8 +216,37 @@ def test_can_run_in_mixed_precision(self, layer_cls, init_args):
         img = tf.random.uniform(
             shape=(3, 512, 512, 3), minval=0, maxval=255, dtype=tf.float32
         )
-        labels = tf.ones((3,), dtype=tf.float16)
-        inputs = {"images": img, "labels": labels}
+
+        bounding_boxes = {
+            "boxes": tf.convert_to_tensor(
+                [
+                    [
+                        [200, 200, 400, 400],
+                        [250, 250, 450, 450],
+                        [300, 300, 500, 500],
+                    ],  # Bounding boxes for image 1
+                    [
+                        [100, 100, 300, 300],
+                        [150, 150, 350, 350],
+                        [200, 200, 400, 400],
+                    ],  # Bounding boxes for image 2
+                    [
+                        [300, 300, 500, 500],
+                        [350, 350, 550, 550],
+                        [400, 400, 600, 600],
+                    ],
+                ],  # Bounding boxes for image 3
+                dtype=tf.float32,
+            ),
+            "classes": tf.ones((3, 3), dtype=tf.float32),
+        }
+
+        inputs = {"images": img}
+
+        if layer_cls in NO_BOUNDING_BOXES_TESTS:
+            inputs["labels"] = bounding_boxes["classes"]
+        else:
+            inputs["bounding_boxes"] = bounding_boxes
 
         layer = layer_cls(**init_args)
         layer(inputs)