Expunge include_rescaling from backbones

Since our models include built in preprocessing, it is much clearer for this rescaling to happen in the preprocessing layers.
mattdangerw · Sep 21, 2024 · cf4a335 · cf4a335
1 parent c4840fa
commit cf4a335
Show file tree

Hide file tree

Showing 32 changed files with 154 additions and 157 deletions.
diff --git a/keras_hub/src/layers/preprocessing/resizing_image_converter.py b/keras_hub/src/layers/preprocessing/resizing_image_converter.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import keras
+from keras import ops
 
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
+from keras_hub.src.utils.keras_utils import standardize_data_format
 from keras_hub.src.utils.tensor_utils import preprocessing_function
 
 
@@ -23,13 +25,23 @@ class ResizingImageConverter(ImageConverter):
     """An `ImageConverter` that simply resizes the input image.
 
     The `ResizingImageConverter` is a subclass of `ImageConverter` for models
-    that simply need to resize image tensors before using them for modeling.
-    The layer will take as input a raw image tensor (batched or unbatched) in the
-    channels last or channels first format, and output a resize tensor.
+    that need to resize (and optionally rescale) image tensors before using them
+    for modeling. The layer will take as input a raw image tensor (batched or
+    unbatched) in the channels last or channels first format, and output a
+    resize tensor.
 
     Args:
-        height: Integer, the height of the output shape.
-        width: Integer, the width of the output shape.
+        height: int, the height of the output shape.
+        width: int, the width of the output shape.
+        scale: float or `None`. If set, the image we be rescaled with a
+            `keras.layers.Rescaling` layer, multiplying the image by this
+            scale.
+        mean: tuples of floats per channel or `None`. If set, the image will be
+            normalized per channel by subtracting mean.
+            If set, also set `variance`.
+        variance: tuples of floats per channel or `None`. If set, the image will
+            be normalized per channel by dividing by `sqrt(variance)`.
+            If set, also set `mean`.
         crop_to_aspect_ratio: If `True`, resize the images without aspect
             ratio distortion. When the original aspect ratio differs
             from the target aspect ratio, the output image will be
@@ -64,6 +76,9 @@ def __init__(
         self,
         height,
         width,
+        scale=None,
+        mean=None,
+        variance=None,
         crop_to_aspect_ratio=True,
         interpolation="bilinear",
         data_format=None,
@@ -78,15 +93,47 @@ def __init__(
             crop_to_aspect_ratio=crop_to_aspect_ratio,
             interpolation=interpolation,
             data_format=data_format,
+            dtype=self.dtype_policy,
+            name="resizing",
         )
+        if scale is not None:
+            self.rescaling = keras.layers.Rescaling(
+                scale=scale,
+                dtype=self.dtype_policy,
+                name="rescaling",
+            )
+        else:
+            self.rescaling = None
+        if (mean is not None) != (variance is not None):
+            raise ValueError(
+                "Both `mean` and `variance` should be set or `None`. Received "
+                f"`mean={mean}`, `variance={variance}`."
+            )
+        self.scale = scale
+        self.mean = mean
+        self.variance = variance
+        self.data_format = standardize_data_format(data_format)
 
     def image_size(self):
         """Returns the preprocessed size of a single image."""
         return (self.resizing.height, self.resizing.width)
 
     @preprocessing_function
     def call(self, inputs):
-        return self.resizing(inputs)
+        x = self.resizing(inputs)
+        if self.rescaling:
+            x = self.rescaling(x)
+        if self.mean is not None:
+            # Avoid `layers.Normalization` so this works batched and unbatched.
+            channels_first = self.data_format == "channels_first"
+            if len(ops.shape(inputs)) == 3:
+                broadcast_dims = (1, 2) if channels_first else (0, 1)
+            else:
+                broadcast_dims = (0, 2, 3) if channels_first else (0, 1, 2)
+            mean = ops.expand_dims(ops.array(self.mean), broadcast_dims)
+            std = ops.expand_dims(ops.sqrt(self.variance), broadcast_dims)
+            x = (x - mean) / std
+        return x
 
     def get_config(self):
         config = super().get_config()
@@ -96,6 +143,9 @@ def get_config(self):
                 "width": self.resizing.width,
                 "interpolation": self.resizing.interpolation,
                 "crop_to_aspect_ratio": self.resizing.crop_to_aspect_ratio,
+                "scale": self.scale,
+                "mean": self.mean,
+                "variance": self.variance,
             }
         )
         return config
diff --git a/keras_hub/src/layers/preprocessing/resizing_image_converter_test.py b/keras_hub/src/layers/preprocessing/resizing_image_converter_test.py
@@ -22,22 +22,57 @@
 
 
 class ResizingImageConverterTest(TestCase):
+    def test_resize_simple(self):
+        converter = ResizingImageConverter(height=4, width=4)
+        inputs = np.ones((10, 10, 3))
+        outputs = converter(inputs)
+        self.assertAllClose(outputs, ops.ones((4, 4, 3)))
+
     def test_resize_one(self):
-        converter = ResizingImageConverter(22, 22)
-        test_image = np.random.rand(10, 10, 3) * 255
-        shape = ops.shape(converter(test_image))
-        self.assertEqual(shape, (22, 22, 3))
+        converter = ResizingImageConverter(
+            height=4,
+            width=4,
+            mean=(0.5, 0.7, 0.3),
+            variance=(0.25, 0.1, 0.5),
+            scale=1 / 255.0,
+        )
+        inputs = np.ones((10, 10, 3)) * 128
+        outputs = converter(inputs)
+        self.assertEqual(ops.shape(outputs), (4, 4, 3))
+        self.assertAllClose(outputs[:, :, 0], np.ones((4, 4)) * 0.003922)
+        self.assertAllClose(outputs[:, :, 1], np.ones((4, 4)) * -0.626255)
+        self.assertAllClose(outputs[:, :, 2], np.ones((4, 4)) * 0.285616)
 
     def test_resize_batch(self):
-        converter = ResizingImageConverter(12, 12)
-        test_batch = np.random.rand(4, 10, 20, 3) * 255
-        shape = ops.shape(converter(test_batch))
-        self.assertEqual(shape, (4, 12, 12, 3))
+        converter = ResizingImageConverter(
+            height=4,
+            width=4,
+            mean=(0.5, 0.7, 0.3),
+            variance=(0.25, 0.1, 0.5),
+            scale=1 / 255.0,
+        )
+        inputs = np.ones((2, 10, 10, 3)) * 128
+        outputs = converter(inputs)
+        self.assertEqual(ops.shape(outputs), (2, 4, 4, 3))
+        self.assertAllClose(outputs[:, :, :, 0], np.ones((2, 4, 4)) * 0.003922)
+        self.assertAllClose(outputs[:, :, :, 1], np.ones((2, 4, 4)) * -0.626255)
+        self.assertAllClose(outputs[:, :, :, 2], np.ones((2, 4, 4)) * 0.285616)
+
+    def test_errors(self):
+        with self.assertRaises(ValueError):
+            ResizingImageConverter(
+                height=4,
+                width=4,
+                mean=(0.5, 0.7, 0.3),
+            )
 
     def test_config(self):
         converter = ResizingImageConverter(
             width=12,
             height=20,
+            mean=(0.5, 0.7, 0.3),
+            variance=(0.25, 0.1, 0.5),
+            scale=1 / 255.0,
             crop_to_aspect_ratio=False,
             interpolation="nearest",
         )

diff --git a/keras_hub/src/models/csp_darknet/csp_darknet_backbone.py b/keras_hub/src/models/csp_darknet/csp_darknet_backbone.py
@@ -31,9 +31,6 @@ class CSPDarkNetBackbone(FeaturePyramidBackbone):
             level in the model.
         stackwise_depth: A list of ints, the depth for each dark level in the
             model.
-        include_rescaling: boolean. If `True`, rescale the input using
-            `Rescaling(1 / 255.0)` layer. If `False`, do nothing. Defaults to
-            `True`.
         block_type: str. One of `"basic_block"` or `"depthwise_block"`.
             Use `"depthwise_block"` for depthwise conv block
             `"basic_block"` for basic conv block.
@@ -55,7 +52,6 @@ class CSPDarkNetBackbone(FeaturePyramidBackbone):
     model = keras_hub.models.CSPDarkNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
     )
     model(input_data)
     ```
@@ -65,7 +61,6 @@ def __init__(
         self,
         stackwise_num_filters,
         stackwise_depth,
-        include_rescaling=True,
         block_type="basic_block",
         image_shape=(None, None, 3),
         **kwargs,
@@ -82,10 +77,7 @@ def __init__(
         base_channels = stackwise_num_filters[0] // 2
 
         image_input = layers.Input(shape=image_shape)
-        x = image_input
-        if include_rescaling:
-            x = layers.Rescaling(scale=1 / 255.0)(x)
-
+        x = image_input  # Intermediate result.
         x = apply_focus(channel_axis, name="stem_focus")(x)
         x = apply_darknet_conv_block(
             base_channels,
@@ -130,7 +122,6 @@ def __init__(
         # === Config ===
         self.stackwise_num_filters = stackwise_num_filters
         self.stackwise_depth = stackwise_depth
-        self.include_rescaling = include_rescaling
         self.block_type = block_type
         self.image_shape = image_shape
         self.pyramid_outputs = pyramid_outputs
@@ -141,7 +132,6 @@ def get_config(self):
             {
                 "stackwise_num_filters": self.stackwise_num_filters,
                 "stackwise_depth": self.stackwise_depth,
-                "include_rescaling": self.include_rescaling,
                 "block_type": self.block_type,
                 "image_shape": self.image_shape,
             }

diff --git a/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py b/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py
@@ -76,7 +76,6 @@ class CSPDarkNetImageClassifier(ImageClassifier):
     backbone = keras_hub.models.CSPDarkNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
         block_type="basic_block",
         image_shape = (224, 224, 3),
     )

diff --git a/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier_test.py b/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier_test.py
@@ -31,7 +31,6 @@ def setUp(self):
         self.backbone = CSPDarkNetBackbone(
             stackwise_num_filters=[2, 16, 16],
             stackwise_depth=[1, 3, 3, 1],
-            include_rescaling=False,
             block_type="basic_block",
             image_shape=(16, 16, 3),
         )

diff --git a/keras_hub/src/models/densenet/densenet_backbone.py b/keras_hub/src/models/densenet/densenet_backbone.py
@@ -31,9 +31,6 @@ class DenseNetBackbone(FeaturePyramidBackbone):
     Args:
         stackwise_num_repeats: list of ints, number of repeated convolutional
             blocks per dense block.
-        include_rescaling: bool, whether to rescale the inputs. If set
-            to `True`, inputs will be passed through a `Rescaling(1/255.0)`
-            layer. Defaults to `True`.
         image_shape: optional shape tuple, defaults to (None, None, 3).
         compression_ratio: float, compression rate at transition layers,
             defaults to 0.5.
@@ -51,7 +48,6 @@ class DenseNetBackbone(FeaturePyramidBackbone):
     # Randomly initialized backbone with a custom config
     model = keras_hub.models.DenseNetBackbone(
         stackwise_num_repeats=[6, 12, 24, 16],
-        include_rescaling=False,
     )
     model(input_data)
     ```
@@ -60,7 +56,6 @@ class DenseNetBackbone(FeaturePyramidBackbone):
     def __init__(
         self,
         stackwise_num_repeats,
-        include_rescaling=True,
         image_shape=(None, None, 3),
         compression_ratio=0.5,
         growth_rate=32,
@@ -71,10 +66,7 @@ def __init__(
         channel_axis = -1 if data_format == "channels_last" else 1
         image_input = keras.layers.Input(shape=image_shape)
 
-        x = image_input
-        if include_rescaling:
-            x = keras.layers.Rescaling(1 / 255.0)(x)
-
+        x = image_input  # Intermediate result.
         x = keras.layers.Conv2D(
             64,
             7,
@@ -124,7 +116,6 @@ def __init__(
 
         # === Config ===
         self.stackwise_num_repeats = stackwise_num_repeats
-        self.include_rescaling = include_rescaling
         self.compression_ratio = compression_ratio
         self.growth_rate = growth_rate
         self.image_shape = image_shape
@@ -135,7 +126,6 @@ def get_config(self):
         config.update(
             {
                 "stackwise_num_repeats": self.stackwise_num_repeats,
-                "include_rescaling": self.include_rescaling,
                 "compression_ratio": self.compression_ratio,
                 "growth_rate": self.growth_rate,
                 "image_shape": self.image_shape,

diff --git a/keras_hub/src/models/densenet/densenet_image_classifier.py b/keras_hub/src/models/densenet/densenet_image_classifier.py
@@ -74,7 +74,6 @@ class DenseNetImageClassifier(ImageClassifier):
     backbone = keras_hub.models.DenseNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
         block_type="basic_block",
         image_shape = (224, 224, 3),
     )

diff --git a/keras_hub/src/models/densenet/densenet_image_classifier_test.py b/keras_hub/src/models/densenet/densenet_image_classifier_test.py
@@ -28,7 +28,6 @@ def setUp(self):
         self.labels = [0, 3]
         self.backbone = DenseNetBackbone(
             stackwise_num_repeats=[6, 12, 24, 16],
-            include_rescaling=True,
             compression_ratio=0.5,
             growth_rate=32,
             image_shape=(224, 224, 3),

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -67,8 +67,6 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
             MBConvBlock, but instead of using a depthwise convolution and a 1x1
             output convolution blocks fused blocks use a single 3x3 convolution
             block.
-        include_rescaling: bool, whether to rescale the inputs. If set to
-            True, inputs will be passed through a `Rescaling(1/255.0)` layer.
         min_depth: integer, minimum number of filters. Can be None and ignored
             if use_depth_divisor_as_min_depth is set to True.
         include_initial_padding: bool, whether to include initial zero padding
@@ -96,7 +94,6 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
         stackwise_block_types=[["fused"] * 3 + ["unfused"] * 3],
         width_coefficient=1.0,
         depth_coefficient=1.0,
-        include_rescaling=False,
     )
     images = np.ones((1, 256, 256, 3))
     outputs = efficientnet.predict(images)
@@ -116,7 +113,6 @@ def __init__(
         stackwise_squeeze_and_excite_ratios,
         stackwise_strides,
         stackwise_block_types,
-        include_rescaling=True,
         dropout=0.2,
         depth_divisor=8,
         min_depth=8,
@@ -129,14 +125,9 @@ def __init__(
         batch_norm_momentum=0.9,
         **kwargs,
     ):
-        img_input = keras.layers.Input(shape=input_shape)
-
-        x = img_input
-
-        if include_rescaling:
-            # Use common rescaling strategy across keras
-            x = keras.layers.Rescaling(scale=1.0 / 255.0)(x)
+        image_input = keras.layers.Input(shape=input_shape)
 
+        x = image_input  # Intermediate result.
         if include_initial_padding:
             x = keras.layers.ZeroPadding2D(
                 padding=self._correct_pad_downsample(x, 3),
@@ -282,10 +273,9 @@ def __init__(
         curr_pyramid_level += 1
 
         # Create model.
-        super().__init__(inputs=img_input, outputs=x, **kwargs)
+        super().__init__(inputs=image_input, outputs=x, **kwargs)
 
         # === Config ===
-        self.include_rescaling = include_rescaling
         self.width_coefficient = width_coefficient
         self.depth_coefficient = depth_coefficient
         self.dropout = dropout
@@ -313,7 +303,6 @@ def get_config(self):
         config = super().get_config()
         config.update(
             {
-                "include_rescaling": self.include_rescaling,
                 "width_coefficient": self.width_coefficient,
                 "depth_coefficient": self.depth_coefficient,
                 "dropout": self.dropout,