diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py index c28698fb47..822f033d64 100644 --- a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py +++ b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py @@ -484,7 +484,9 @@ def _augment(self, inputs): image=raw_image, ) - bounding_boxes = bounding_box.to_ragged(bounding_boxes) + bounding_boxes = bounding_box.to_ragged( + bounding_boxes, dtype=self.compute_dtype + ) result[BOUNDING_BOXES] = bounding_boxes if keypoints is not None: diff --git a/keras_cv/layers/preprocessing/random_crop.py b/keras_cv/layers/preprocessing/random_crop.py index ff95bb7c17..be74ab6fab 100644 --- a/keras_cv/layers/preprocessing/random_crop.py +++ b/keras_cv/layers/preprocessing/random_crop.py @@ -78,11 +78,17 @@ def compute_ragged_image_signature(self, images): return ragged_spec def get_random_transformation_batch(self, batch_size, **kwargs): - tops = self._random_generator.random_uniform( - shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.float32 + tops = tf.cast( + self._random_generator.random_uniform( + shape=(batch_size, 1), minval=0, maxval=1 + ), + self.compute_dtype, ) - lefts = self._random_generator.random_uniform( - shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.float32 + lefts = tf.cast( + self._random_generator.random_uniform( + shape=(batch_size, 1), minval=0, maxval=1 + ), + self.compute_dtype, ) return {"tops": tops, "lefts": lefts} @@ -212,13 +218,14 @@ def _get_image_shape(self, images): def _crop_images(self, images, transformations): batch_size = tf.shape(images)[0] heights, widths = self._get_image_shape(images) - heights = tf.cast(heights, dtype=tf.float32) - widths = tf.cast(widths, dtype=tf.float32) + heights = tf.cast(heights, dtype=self.compute_dtype) + widths = tf.cast(widths, dtype=self.compute_dtype) tops = transformations["tops"] lefts = transformations["lefts"] x1s = lefts * (widths - self.width) y1s = tops * (heights - self.height) + x2s = x1s + self.width y2s = y1s + self.height # normalize @@ -229,8 +236,8 @@ def _crop_images(self, images, transformations): boxes = tf.concat([y1s, x1s, y2s, x2s], axis=-1) images = tf.image.crop_and_resize( - images, - boxes, + tf.cast(images, tf.float32), + tf.cast(boxes, tf.float32), tf.range(batch_size), [self.height, self.width], method="nearest", @@ -246,8 +253,8 @@ def _crop_bounding_boxes(self, images, boxes, transformation): tops = transformation["tops"] lefts = transformation["lefts"] heights, widths = self._get_image_shape(images) - heights = tf.cast(heights, dtype=tf.float32) - widths = tf.cast(widths, dtype=tf.float32) + heights = tf.cast(heights, dtype=self.compute_dtype) + widths = tf.cast(widths, dtype=self.compute_dtype) # compute offsets for xyxy bounding_boxes top_offsets = tf.cast( @@ -259,7 +266,9 @@ def _crop_bounding_boxes(self, images, boxes, transformation): dtype=self.compute_dtype, ) - x1s, y1s, x2s, y2s = tf.split(boxes, 4, axis=-1) + x1s, y1s, x2s, y2s = tf.split( + tf.cast(boxes, self.compute_dtype), 4, axis=-1 + ) x1s -= tf.expand_dims(left_offsets, axis=1) y1s -= tf.expand_dims(top_offsets, axis=1) x2s -= tf.expand_dims(left_offsets, axis=1) @@ -269,11 +278,13 @@ def _crop_bounding_boxes(self, images, boxes, transformation): def _resize_bounding_boxes(self, images, boxes): heights, widths = self._get_image_shape(images) - heights = tf.cast(heights, dtype=tf.float32) - widths = tf.cast(widths, dtype=tf.float32) + heights = tf.cast(heights, dtype=self.compute_dtype) + widths = tf.cast(widths, dtype=self.compute_dtype) x_scale = tf.cast(self.width / widths, dtype=self.compute_dtype) y_scale = tf.cast(self.height / heights, dtype=self.compute_dtype) - x1s, y1s, x2s, y2s = tf.split(boxes, 4, axis=-1) + x1s, y1s, x2s, y2s = tf.split( + tf.cast(boxes, self.compute_dtype), 4, axis=-1 + ) outputs = tf.concat( [ x1s * x_scale[:, tf.newaxis, :], diff --git a/keras_cv/layers/preprocessing/with_mixed_precision_test.py b/keras_cv/layers/preprocessing/with_mixed_precision_test.py index c1a6d63aab..0468575fe6 100644 --- a/keras_cv/layers/preprocessing/with_mixed_precision_test.py +++ b/keras_cv/layers/preprocessing/with_mixed_precision_test.py @@ -30,6 +30,8 @@ "target_size": (224, 224), "crop_area_factor": (0.8, 1.0), "aspect_ratio_factor": (3 / 4, 4 / 3), + "bounding_box_format": "xywh", + "dtype": "float32", }, ), ("Grayscale", layers.Grayscale, {}), @@ -50,7 +52,11 @@ layers.RandomCutout, {"height_factor": 0.2, "width_factor": 0.2}, ), - ("RandomFlip", layers.RandomFlip, {"mode": "horizontal"}), + ( + "RandomFlip", + layers.RandomFlip, + {"mode": "horizontal", "bounding_box_format": "xyxy"}, + ), ( "RandomHue", layers.RandomHue, @@ -59,7 +65,12 @@ ( "RandomTranslation", layers.RandomTranslation, - {"width_factor": 0.5, "height_factor": 0.5}, + { + "width_factor": 0.5, + "height_factor": 0.5, + "bounding_box_format": "xyxy", + "dtype": "float32", + }, ), ( "RandomChannelShift", @@ -86,21 +97,54 @@ ( "RandomGaussianBlur", layers.RandomGaussianBlur, - {"kernel_size": 3, "factor": (0.0, 3.0)}, + {"kernel_size": 3, "factor": (0.0, 3.0), "dtype": "float32"}, + ), + ( + "RandomJpegQuality", + layers.RandomJpegQuality, + {"factor": (75, 100), "dtype": "float32"}, + ), + ( + "RandomRotation", + layers.RandomRotation, + { + "factor": 0.5, + "bounding_box_format": "xyxy", + "dtype": "float32", + }, ), - ("RandomJpegQuality", layers.RandomJpegQuality, {"factor": (75, 100)}), - ("RandomRotation", layers.RandomRotation, {"factor": 0.5}), ("RandomSaturation", layers.RandomSaturation, {"factor": 0.5}), ( "RandomSharpness", layers.RandomSharpness, {"factor": 0.5, "value_range": (0, 255)}, ), - ("RandomAspectRatio", layers.RandomAspectRatio, {"factor": (0.9, 1.1)}), - ("RandomShear", layers.RandomShear, {"x_factor": 0.3, "x_factor": 0.3}), + ( + "RandomAspectRatio", + layers.RandomAspectRatio, + { + "factor": (0.9, 1.1), + "bounding_box_format": "xyxy", + "dtype": "float32", + }, + ), + ( + "RandomShear", + layers.RandomShear, + { + "x_factor": 0.3, + "x_factor": 0.3, + "bounding_box_format": "xyxy", + "dtype": "float32", + }, + ), ("Solarization", layers.Solarization, {"value_range": (0, 255)}), - ("Mosaic", layers.Mosaic, {}), - ("CutMix", layers.CutMix, {}), + ( + "Mosaic", + layers.Mosaic, + {"bounding_box_format": "xyxy"}, + ), + ("CutMix", layers.CutMix, {"dtype": "float32"}), ("MixUp", layers.MixUp, {}), ( "Resizing", @@ -108,6 +152,9 @@ { "height": 224, "width": 224, + "bounding_box_format": "xyxy", + "pad_to_aspect_ratio": True, + "dtype": "float32", }, ), ( @@ -117,6 +164,7 @@ "target_size": (224, 224), "scale_factor": (0.8, 1.25), "bounding_box_format": "xywh", + "dtype": "float32", }, ), ( @@ -127,7 +175,7 @@ ( "RandomCrop", layers.RandomCrop, - {"height": 224, "width": 224}, + {"height": 224, "width": 224, "bounding_box_format": "xyxy"}, ), ( "Rescaling", @@ -145,6 +193,12 @@ layers.RandomHue, ] +NO_BOUNDING_BOXES_TESTS = [ + layers.RandomCutout, + layers.RandomZoom, + layers.CutMix, +] + class WithMixedPrecisionTest(TestCase): @parameterized.named_parameters(*TEST_CONFIGURATIONS) @@ -162,8 +216,37 @@ def test_can_run_in_mixed_precision(self, layer_cls, init_args): img = tf.random.uniform( shape=(3, 512, 512, 3), minval=0, maxval=255, dtype=tf.float32 ) - labels = tf.ones((3,), dtype=tf.float16) - inputs = {"images": img, "labels": labels} + + bounding_boxes = { + "boxes": tf.convert_to_tensor( + [ + [ + [200, 200, 400, 400], + [250, 250, 450, 450], + [300, 300, 500, 500], + ], # Bounding boxes for image 1 + [ + [100, 100, 300, 300], + [150, 150, 350, 350], + [200, 200, 400, 400], + ], # Bounding boxes for image 2 + [ + [300, 300, 500, 500], + [350, 350, 550, 550], + [400, 400, 600, 600], + ], + ], # Bounding boxes for image 3 + dtype=tf.float32, + ), + "classes": tf.ones((3, 3), dtype=tf.float32), + } + + inputs = {"images": img} + + if layer_cls in NO_BOUNDING_BOXES_TESTS: + inputs["labels"] = bounding_boxes["classes"] + else: + inputs["bounding_boxes"] = bounding_boxes layer = layer_cls(**init_args) layer(inputs)