From cf4a335425ffa99bd3d3e372d7105989d704a74d Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Fri, 20 Sep 2024 18:14:25 -0700
Subject: [PATCH] Expunge include_rescaling from backbones

Since our models include built in preprocessing, it is much clearer for
this rescaling to happen in the preprocessing layers.
---
 .../preprocessing/resizing_image_converter.py | 62 +++++++++++++++++--
 .../resizing_image_converter_test.py          | 51 ++++++++++++---
 .../csp_darknet/csp_darknet_backbone.py       | 12 +---
 .../csp_darknet_image_classifier.py           |  1 -
 .../csp_darknet_image_classifier_test.py      |  1 -
 .../src/models/densenet/densenet_backbone.py  | 12 +---
 .../densenet/densenet_image_classifier.py     |  1 -
 .../densenet_image_classifier_test.py         |  1 -
 .../efficientnet/efficientnet_backbone.py     | 17 +----
 .../efficientnet_backbone_test.py             |  8 ---
 .../mix_transformer_backbone.py               | 12 +---
 .../mix_transformer_backbone_test.py          |  1 -
 .../mix_transformer_classifier.py             |  1 -
 .../mix_transformer_classifier_test.py        |  1 -
 .../models/mobilenet/mobilenet_backbone.py    | 17 +----
 .../mobilenet/mobilenet_backbone_test.py      |  1 -
 .../mobilenet/mobilenet_image_classifier.py   |  1 -
 .../mobilenet_image_classifier_test.py        |  1 -
 .../src/models/pali_gemma/pali_gemma_vit.py   |  3 +
 .../src/models/resnet/resnet_backbone.py      | 22 +------
 .../models/resnet/resnet_image_classifier.py  |  1 -
 .../resnet/resnet_image_classifier_test.py    |  3 +-
 keras_hub/src/models/resnet/resnet_presets.py | 12 ++--
 keras_hub/src/models/vgg/vgg_backbone.py      |  8 ---
 keras_hub/src/models/vgg/vgg_backbone_test.py |  1 -
 .../src/models/vgg/vgg_image_classifier.py    |  1 -
 .../models/vgg/vgg_image_classifier_test.py   |  1 -
 .../src/models/vit_det/vit_det_backbone.py    |  9 ---
 .../models/vit_det/vit_det_backbone_test.py   |  1 -
 keras_hub/src/utils/timm/convert_resnet.py    |  8 ---
 keras_hub/src/utils/timm/preset_loader.py     | 17 ++++-
 .../convert_resnet_checkpoints.py             | 23 +++++--
 32 files changed, 154 insertions(+), 157 deletions(-)

diff --git a/keras_hub/src/layers/preprocessing/resizing_image_converter.py b/keras_hub/src/layers/preprocessing/resizing_image_converter.py
index cfce694b65..f5e044c886 100644
--- a/keras_hub/src/layers/preprocessing/resizing_image_converter.py
+++ b/keras_hub/src/layers/preprocessing/resizing_image_converter.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import keras
+from keras import ops
 
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
+from keras_hub.src.utils.keras_utils import standardize_data_format
 from keras_hub.src.utils.tensor_utils import preprocessing_function
 
 
@@ -23,13 +25,23 @@ class ResizingImageConverter(ImageConverter):
     """An `ImageConverter` that simply resizes the input image.
 
     The `ResizingImageConverter` is a subclass of `ImageConverter` for models
-    that simply need to resize image tensors before using them for modeling.
-    The layer will take as input a raw image tensor (batched or unbatched) in the
-    channels last or channels first format, and output a resize tensor.
+    that need to resize (and optionally rescale) image tensors before using them
+    for modeling. The layer will take as input a raw image tensor (batched or
+    unbatched) in the channels last or channels first format, and output a
+    resize tensor.
 
     Args:
-        height: Integer, the height of the output shape.
-        width: Integer, the width of the output shape.
+        height: int, the height of the output shape.
+        width: int, the width of the output shape.
+        scale: float or `None`. If set, the image we be rescaled with a
+            `keras.layers.Rescaling` layer, multiplying the image by this
+            scale.
+        mean: tuples of floats per channel or `None`. If set, the image will be
+            normalized per channel by subtracting mean.
+            If set, also set `variance`.
+        variance: tuples of floats per channel or `None`. If set, the image will
+            be normalized per channel by dividing by `sqrt(variance)`.
+            If set, also set `mean`.
         crop_to_aspect_ratio: If `True`, resize the images without aspect
             ratio distortion. When the original aspect ratio differs
             from the target aspect ratio, the output image will be
@@ -64,6 +76,9 @@ def __init__(
         self,
         height,
         width,
+        scale=None,
+        mean=None,
+        variance=None,
         crop_to_aspect_ratio=True,
         interpolation="bilinear",
         data_format=None,
@@ -78,7 +93,26 @@ def __init__(
             crop_to_aspect_ratio=crop_to_aspect_ratio,
             interpolation=interpolation,
             data_format=data_format,
+            dtype=self.dtype_policy,
+            name="resizing",
         )
+        if scale is not None:
+            self.rescaling = keras.layers.Rescaling(
+                scale=scale,
+                dtype=self.dtype_policy,
+                name="rescaling",
+            )
+        else:
+            self.rescaling = None
+        if (mean is not None) != (variance is not None):
+            raise ValueError(
+                "Both `mean` and `variance` should be set or `None`. Received "
+                f"`mean={mean}`, `variance={variance}`."
+            )
+        self.scale = scale
+        self.mean = mean
+        self.variance = variance
+        self.data_format = standardize_data_format(data_format)
 
     def image_size(self):
         """Returns the preprocessed size of a single image."""
@@ -86,7 +120,20 @@ def image_size(self):
 
     @preprocessing_function
     def call(self, inputs):
-        return self.resizing(inputs)
+        x = self.resizing(inputs)
+        if self.rescaling:
+            x = self.rescaling(x)
+        if self.mean is not None:
+            # Avoid `layers.Normalization` so this works batched and unbatched.
+            channels_first = self.data_format == "channels_first"
+            if len(ops.shape(inputs)) == 3:
+                broadcast_dims = (1, 2) if channels_first else (0, 1)
+            else:
+                broadcast_dims = (0, 2, 3) if channels_first else (0, 1, 2)
+            mean = ops.expand_dims(ops.array(self.mean), broadcast_dims)
+            std = ops.expand_dims(ops.sqrt(self.variance), broadcast_dims)
+            x = (x - mean) / std
+        return x
 
     def get_config(self):
         config = super().get_config()
@@ -96,6 +143,9 @@ def get_config(self):
                 "width": self.resizing.width,
                 "interpolation": self.resizing.interpolation,
                 "crop_to_aspect_ratio": self.resizing.crop_to_aspect_ratio,
+                "scale": self.scale,
+                "mean": self.mean,
+                "variance": self.variance,
             }
         )
         return config
diff --git a/keras_hub/src/layers/preprocessing/resizing_image_converter_test.py b/keras_hub/src/layers/preprocessing/resizing_image_converter_test.py
index 857cf578a8..b54b0a0d94 100644
--- a/keras_hub/src/layers/preprocessing/resizing_image_converter_test.py
+++ b/keras_hub/src/layers/preprocessing/resizing_image_converter_test.py
@@ -22,22 +22,57 @@
 
 
 class ResizingImageConverterTest(TestCase):
+    def test_resize_simple(self):
+        converter = ResizingImageConverter(height=4, width=4)
+        inputs = np.ones((10, 10, 3))
+        outputs = converter(inputs)
+        self.assertAllClose(outputs, ops.ones((4, 4, 3)))
+
     def test_resize_one(self):
-        converter = ResizingImageConverter(22, 22)
-        test_image = np.random.rand(10, 10, 3) * 255
-        shape = ops.shape(converter(test_image))
-        self.assertEqual(shape, (22, 22, 3))
+        converter = ResizingImageConverter(
+            height=4,
+            width=4,
+            mean=(0.5, 0.7, 0.3),
+            variance=(0.25, 0.1, 0.5),
+            scale=1 / 255.0,
+        )
+        inputs = np.ones((10, 10, 3)) * 128
+        outputs = converter(inputs)
+        self.assertEqual(ops.shape(outputs), (4, 4, 3))
+        self.assertAllClose(outputs[:, :, 0], np.ones((4, 4)) * 0.003922)
+        self.assertAllClose(outputs[:, :, 1], np.ones((4, 4)) * -0.626255)
+        self.assertAllClose(outputs[:, :, 2], np.ones((4, 4)) * 0.285616)
 
     def test_resize_batch(self):
-        converter = ResizingImageConverter(12, 12)
-        test_batch = np.random.rand(4, 10, 20, 3) * 255
-        shape = ops.shape(converter(test_batch))
-        self.assertEqual(shape, (4, 12, 12, 3))
+        converter = ResizingImageConverter(
+            height=4,
+            width=4,
+            mean=(0.5, 0.7, 0.3),
+            variance=(0.25, 0.1, 0.5),
+            scale=1 / 255.0,
+        )
+        inputs = np.ones((2, 10, 10, 3)) * 128
+        outputs = converter(inputs)
+        self.assertEqual(ops.shape(outputs), (2, 4, 4, 3))
+        self.assertAllClose(outputs[:, :, :, 0], np.ones((2, 4, 4)) * 0.003922)
+        self.assertAllClose(outputs[:, :, :, 1], np.ones((2, 4, 4)) * -0.626255)
+        self.assertAllClose(outputs[:, :, :, 2], np.ones((2, 4, 4)) * 0.285616)
+
+    def test_errors(self):
+        with self.assertRaises(ValueError):
+            ResizingImageConverter(
+                height=4,
+                width=4,
+                mean=(0.5, 0.7, 0.3),
+            )
 
     def test_config(self):
         converter = ResizingImageConverter(
             width=12,
             height=20,
+            mean=(0.5, 0.7, 0.3),
+            variance=(0.25, 0.1, 0.5),
+            scale=1 / 255.0,
             crop_to_aspect_ratio=False,
             interpolation="nearest",
         )
diff --git a/keras_hub/src/models/csp_darknet/csp_darknet_backbone.py b/keras_hub/src/models/csp_darknet/csp_darknet_backbone.py
index ab33823405..bf12c9d7f3 100644
--- a/keras_hub/src/models/csp_darknet/csp_darknet_backbone.py
+++ b/keras_hub/src/models/csp_darknet/csp_darknet_backbone.py
@@ -31,9 +31,6 @@ class CSPDarkNetBackbone(FeaturePyramidBackbone):
             level in the model.
         stackwise_depth: A list of ints, the depth for each dark level in the
             model.
-        include_rescaling: boolean. If `True`, rescale the input using
-            `Rescaling(1 / 255.0)` layer. If `False`, do nothing. Defaults to
-            `True`.
         block_type: str. One of `"basic_block"` or `"depthwise_block"`.
             Use `"depthwise_block"` for depthwise conv block
             `"basic_block"` for basic conv block.
@@ -55,7 +52,6 @@ class CSPDarkNetBackbone(FeaturePyramidBackbone):
     model = keras_hub.models.CSPDarkNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
     )
     model(input_data)
     ```
@@ -65,7 +61,6 @@ def __init__(
         self,
         stackwise_num_filters,
         stackwise_depth,
-        include_rescaling=True,
         block_type="basic_block",
         image_shape=(None, None, 3),
         **kwargs,
@@ -82,10 +77,7 @@ def __init__(
         base_channels = stackwise_num_filters[0] // 2
 
         image_input = layers.Input(shape=image_shape)
-        x = image_input
-        if include_rescaling:
-            x = layers.Rescaling(scale=1 / 255.0)(x)
-
+        x = image_input  # Intermediate result.
         x = apply_focus(channel_axis, name="stem_focus")(x)
         x = apply_darknet_conv_block(
             base_channels,
@@ -130,7 +122,6 @@ def __init__(
         # === Config ===
         self.stackwise_num_filters = stackwise_num_filters
         self.stackwise_depth = stackwise_depth
-        self.include_rescaling = include_rescaling
         self.block_type = block_type
         self.image_shape = image_shape
         self.pyramid_outputs = pyramid_outputs
@@ -141,7 +132,6 @@ def get_config(self):
             {
                 "stackwise_num_filters": self.stackwise_num_filters,
                 "stackwise_depth": self.stackwise_depth,
-                "include_rescaling": self.include_rescaling,
                 "block_type": self.block_type,
                 "image_shape": self.image_shape,
             }
diff --git a/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py b/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py
index 28069e7d9f..4a7d4719e3 100644
--- a/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py
+++ b/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py
@@ -76,7 +76,6 @@ class CSPDarkNetImageClassifier(ImageClassifier):
     backbone = keras_hub.models.CSPDarkNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
         block_type="basic_block",
         image_shape = (224, 224, 3),
     )
diff --git a/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier_test.py b/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier_test.py
index f3735be2fe..c67685b763 100644
--- a/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier_test.py
+++ b/keras_hub/src/models/csp_darknet/csp_darknet_image_classifier_test.py
@@ -31,7 +31,6 @@ def setUp(self):
         self.backbone = CSPDarkNetBackbone(
             stackwise_num_filters=[2, 16, 16],
             stackwise_depth=[1, 3, 3, 1],
-            include_rescaling=False,
             block_type="basic_block",
             image_shape=(16, 16, 3),
         )
diff --git a/keras_hub/src/models/densenet/densenet_backbone.py b/keras_hub/src/models/densenet/densenet_backbone.py
index 2b840011d1..8778f27d76 100644
--- a/keras_hub/src/models/densenet/densenet_backbone.py
+++ b/keras_hub/src/models/densenet/densenet_backbone.py
@@ -31,9 +31,6 @@ class DenseNetBackbone(FeaturePyramidBackbone):
     Args:
         stackwise_num_repeats: list of ints, number of repeated convolutional
             blocks per dense block.
-        include_rescaling: bool, whether to rescale the inputs. If set
-            to `True`, inputs will be passed through a `Rescaling(1/255.0)`
-            layer. Defaults to `True`.
         image_shape: optional shape tuple, defaults to (None, None, 3).
         compression_ratio: float, compression rate at transition layers,
             defaults to 0.5.
@@ -51,7 +48,6 @@ class DenseNetBackbone(FeaturePyramidBackbone):
     # Randomly initialized backbone with a custom config
     model = keras_hub.models.DenseNetBackbone(
         stackwise_num_repeats=[6, 12, 24, 16],
-        include_rescaling=False,
     )
     model(input_data)
     ```
@@ -60,7 +56,6 @@ class DenseNetBackbone(FeaturePyramidBackbone):
     def __init__(
         self,
         stackwise_num_repeats,
-        include_rescaling=True,
         image_shape=(None, None, 3),
         compression_ratio=0.5,
         growth_rate=32,
@@ -71,10 +66,7 @@ def __init__(
         channel_axis = -1 if data_format == "channels_last" else 1
         image_input = keras.layers.Input(shape=image_shape)
 
-        x = image_input
-        if include_rescaling:
-            x = keras.layers.Rescaling(1 / 255.0)(x)
-
+        x = image_input  # Intermediate result.
         x = keras.layers.Conv2D(
             64,
             7,
@@ -124,7 +116,6 @@ def __init__(
 
         # === Config ===
         self.stackwise_num_repeats = stackwise_num_repeats
-        self.include_rescaling = include_rescaling
         self.compression_ratio = compression_ratio
         self.growth_rate = growth_rate
         self.image_shape = image_shape
@@ -135,7 +126,6 @@ def get_config(self):
         config.update(
             {
                 "stackwise_num_repeats": self.stackwise_num_repeats,
-                "include_rescaling": self.include_rescaling,
                 "compression_ratio": self.compression_ratio,
                 "growth_rate": self.growth_rate,
                 "image_shape": self.image_shape,
diff --git a/keras_hub/src/models/densenet/densenet_image_classifier.py b/keras_hub/src/models/densenet/densenet_image_classifier.py
index 6bd7bbbaa1..c727106f42 100644
--- a/keras_hub/src/models/densenet/densenet_image_classifier.py
+++ b/keras_hub/src/models/densenet/densenet_image_classifier.py
@@ -74,7 +74,6 @@ class DenseNetImageClassifier(ImageClassifier):
     backbone = keras_hub.models.DenseNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
         block_type="basic_block",
         image_shape = (224, 224, 3),
     )
diff --git a/keras_hub/src/models/densenet/densenet_image_classifier_test.py b/keras_hub/src/models/densenet/densenet_image_classifier_test.py
index b4bb19d35a..da3fb20d1b 100644
--- a/keras_hub/src/models/densenet/densenet_image_classifier_test.py
+++ b/keras_hub/src/models/densenet/densenet_image_classifier_test.py
@@ -28,7 +28,6 @@ def setUp(self):
         self.labels = [0, 3]
         self.backbone = DenseNetBackbone(
             stackwise_num_repeats=[6, 12, 24, 16],
-            include_rescaling=True,
             compression_ratio=0.5,
             growth_rate=32,
             image_shape=(224, 224, 3),
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 2cb7a82f8b..405ea7bce0 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -67,8 +67,6 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
             MBConvBlock, but instead of using a depthwise convolution and a 1x1
             output convolution blocks fused blocks use a single 3x3 convolution
             block.
-        include_rescaling: bool, whether to rescale the inputs. If set to
-            True, inputs will be passed through a `Rescaling(1/255.0)` layer.
         min_depth: integer, minimum number of filters. Can be None and ignored
             if use_depth_divisor_as_min_depth is set to True.
         include_initial_padding: bool, whether to include initial zero padding
@@ -96,7 +94,6 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
         stackwise_block_types=[["fused"] * 3 + ["unfused"] * 3],
         width_coefficient=1.0,
         depth_coefficient=1.0,
-        include_rescaling=False,
     )
     images = np.ones((1, 256, 256, 3))
     outputs = efficientnet.predict(images)
@@ -116,7 +113,6 @@ def __init__(
         stackwise_squeeze_and_excite_ratios,
         stackwise_strides,
         stackwise_block_types,
-        include_rescaling=True,
         dropout=0.2,
         depth_divisor=8,
         min_depth=8,
@@ -129,14 +125,9 @@ def __init__(
         batch_norm_momentum=0.9,
         **kwargs,
     ):
-        img_input = keras.layers.Input(shape=input_shape)
-
-        x = img_input
-
-        if include_rescaling:
-            # Use common rescaling strategy across keras
-            x = keras.layers.Rescaling(scale=1.0 / 255.0)(x)
+        image_input = keras.layers.Input(shape=input_shape)
 
+        x = image_input  # Intermediate result.
         if include_initial_padding:
             x = keras.layers.ZeroPadding2D(
                 padding=self._correct_pad_downsample(x, 3),
@@ -282,10 +273,9 @@ def __init__(
         curr_pyramid_level += 1
 
         # Create model.
-        super().__init__(inputs=img_input, outputs=x, **kwargs)
+        super().__init__(inputs=image_input, outputs=x, **kwargs)
 
         # === Config ===
-        self.include_rescaling = include_rescaling
         self.width_coefficient = width_coefficient
         self.depth_coefficient = depth_coefficient
         self.dropout = dropout
@@ -313,7 +303,6 @@ def get_config(self):
         config = super().get_config()
         config.update(
             {
-                "include_rescaling": self.include_rescaling,
                 "width_coefficient": self.width_coefficient,
                 "depth_coefficient": self.depth_coefficient,
                 "dropout": self.dropout,
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py b/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
index aab9f6dc69..918bc8087d 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
@@ -42,7 +42,6 @@ def setUp(self):
             "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
             "width_coefficient": 1.0,
             "depth_coefficient": 1.0,
-            "include_rescaling": False,
         }
         self.input_data = keras.ops.ones(shape=(8, 224, 224, 3))
 
@@ -86,7 +85,6 @@ def test_valid_call_original_v1(self):
             ],
             "width_coefficient": 1.0,
             "depth_coefficient": 1.0,
-            "include_rescaling": False,
             "stackwise_block_types": ["v1"] * 7,
             "min_depth": None,
             "include_initial_padding": True,
@@ -98,12 +96,6 @@ def test_valid_call_original_v1(self):
         model = EfficientNetBackbone(**original_v1_kwargs)
         model(self.input_data)
 
-    def test_valid_call_with_rescaling(self):
-        test_kwargs = self.init_kwargs.copy()
-        test_kwargs["include_rescaling"] = True
-        model = EfficientNetBackbone(**test_kwargs)
-        model(self.input_data)
-
     def test_feature_pyramid_outputs(self):
         backbone = EfficientNetBackbone(**self.init_kwargs)
         model = keras.Model(
diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py
index 5127bd357b..6986be7c45 100644
--- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py
+++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py
@@ -36,7 +36,6 @@ def __init__(
         end_value,
         patch_sizes,
         strides,
-        include_rescaling=True,
         image_shape=(None, None, 3),
         hidden_dims=None,
         **kwargs,
@@ -60,9 +59,6 @@ def __init__(
                 value projections. If set to > 1, a `Conv2D` layer is used to
                 reduce the length of the sequence.
             end_value: The end value of the sequence.
-            include_rescaling: bool, whether to rescale the inputs. If set
-                to `True`, inputs will be passed through a `Rescaling(1/255.0)`
-                layer. Defaults to `True`.
             image_shape: optional shape tuple, defaults to (None, None, 3).
             hidden_dims: the embedding dims per hierarchical layer, used as
                 the levels of the feature pyramid.
@@ -123,11 +119,7 @@ def __init__(
 
         # === Functional Model ===
         image_input = keras.layers.Input(shape=image_shape)
-        x = image_input
-
-        if include_rescaling:
-            x = keras.layers.Rescaling(scale=1 / 255)(x)
-
+        x = image_input  # Intermediate result.
         pyramid_outputs = {}
         for i in range(num_layers):
             # Compute new height/width after the `proj`
@@ -151,7 +143,6 @@ def __init__(
 
         # === Config ===
         self.depths = depths
-        self.include_rescaling = include_rescaling
         self.image_shape = image_shape
         self.hidden_dims = hidden_dims
         self.pyramid_outputs = pyramid_outputs
@@ -167,7 +158,6 @@ def get_config(self):
         config.update(
             {
                 "depths": self.depths,
-                "include_rescaling": self.include_rescaling,
                 "hidden_dims": self.hidden_dims,
                 "image_shape": self.image_shape,
                 "num_layers": self.num_layers,
diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py
index 9cab12b7bb..bab58103b4 100644
--- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py
+++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py
@@ -25,7 +25,6 @@ class MiTBackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
             "depths": [2, 2],
-            "include_rescaling": True,
             "image_shape": (16, 16, 3),
             "hidden_dims": [4, 8],
             "num_layers": 2,
diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py
index c6ff3fba1e..7de8aea880 100644
--- a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py
+++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py
@@ -76,7 +76,6 @@ class MiTImageClassifier(ImageClassifier):
     backbone = keras_hub.models.MiTBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,
         block_type="basic_block",
         image_shape = (224, 224, 3),
     )
diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py
index e17071229a..1d5d4ec444 100644
--- a/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py
+++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py
@@ -30,7 +30,6 @@ def setUp(self):
         self.labels = [0, 3]
         self.backbone = MiTBackbone(
             depths=[2, 2, 2, 2],
-            include_rescaling=True,
             image_shape=(16, 16, 3),
             hidden_dims=[4, 8],
             num_layers=2,
diff --git a/keras_hub/src/models/mobilenet/mobilenet_backbone.py b/keras_hub/src/models/mobilenet/mobilenet_backbone.py
index 27072ddf37..ff83364472 100644
--- a/keras_hub/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_hub/src/models/mobilenet/mobilenet_backbone.py
@@ -54,9 +54,6 @@ class MobileNetBackbone(Backbone):
             model. 0 if dont want to add Squeeze and Excite layer.
         stackwise_activation: list of activation functions, for each inverted
              residual block in the model.
-        include_rescaling: bool, whether to rescale the inputs. If set to True,
-            inputs will be passed through a `Rescaling(scale=1 / 255)`
-            layer.
         image_shape: optional shape tuple, defaults to (224, 224, 3).
         depth_multiplier: float, controls the width of the network.
             - If `depth_multiplier` < 1.0, proportionally decreases the number
@@ -92,7 +89,6 @@ class MobileNetBackbone(Backbone):
         stackwise_num_strides=[2, 2, 1],
         stackwise_se_ratio=[0.25, None, 0.25],
         stackwise_activation=["relu", "relu6", "hard_swish"],
-        include_rescaling=False,
         output_num_filters=1280,
         input_activation='hard_swish',
         output_activation='hard_swish',
@@ -111,7 +107,6 @@ def __init__(
         stackwise_num_strides,
         stackwise_se_ratio,
         stackwise_activation,
-        include_rescaling,
         output_num_filters,
         inverted_res_block,
         image_shape=(224, 224, 3),
@@ -126,12 +121,8 @@ def __init__(
             -1 if keras.config.image_data_format() == "channels_last" else 1
         )
 
-        inputs = keras.layers.Input(shape=image_shape)
-        x = inputs
-
-        if include_rescaling:
-            x = keras.layers.Rescaling(scale=1 / 255)(x)
-
+        image_input = keras.layers.Input(shape=image_shape)
+        x = image_input  # Intermediate result.
         input_num_filters = adjust_channels(input_num_filters)
         x = keras.layers.Conv2D(
             input_num_filters,
@@ -195,7 +186,7 @@ def __init__(
             )(x)
             x = keras.layers.Activation(output_activation)(x)
 
-        super().__init__(inputs=inputs, outputs=x, **kwargs)
+        super().__init__(inputs=image_input, outputs=x, **kwargs)
 
         # === Config ===
         self.stackwise_expansion = stackwise_expansion
@@ -204,7 +195,6 @@ def __init__(
         self.stackwise_num_strides = stackwise_num_strides
         self.stackwise_se_ratio = stackwise_se_ratio
         self.stackwise_activation = stackwise_activation
-        self.include_rescaling = include_rescaling
         self.depth_multiplier = depth_multiplier
         self.input_num_filters = input_num_filters
         self.output_num_filters = output_num_filters
@@ -223,7 +213,6 @@ def get_config(self):
                 "stackwise_num_strides": self.stackwise_num_strides,
                 "stackwise_se_ratio": self.stackwise_se_ratio,
                 "stackwise_activation": self.stackwise_activation,
-                "include_rescaling": self.include_rescaling,
                 "image_shape": self.image_shape,
                 "depth_multiplier": self.depth_multiplier,
                 "input_num_filters": self.input_num_filters,
diff --git a/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py b/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py
index 32d1c27c47..cf49194c5c 100644
--- a/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py
@@ -28,7 +28,6 @@ def setUp(self):
             "stackwise_num_strides": [2, 2, 1],
             "stackwise_se_ratio": [0.25, None, 0.25],
             "stackwise_activation": ["relu", "relu", "hard_swish"],
-            "include_rescaling": False,
             "output_num_filters": 1280,
             "input_activation": "hard_swish",
             "output_activation": "hard_swish",
diff --git a/keras_hub/src/models/mobilenet/mobilenet_image_classifier.py b/keras_hub/src/models/mobilenet/mobilenet_image_classifier.py
index b744e7c40f..407feac11a 100644
--- a/keras_hub/src/models/mobilenet/mobilenet_image_classifier.py
+++ b/keras_hub/src/models/mobilenet/mobilenet_image_classifier.py
@@ -56,7 +56,6 @@ class MobileNetImageClassifier(ImageClassifier):
         stackwise_stride = [2, 2, 1],
         stackwise_se_ratio = [ 0.25, None, 0.25],
         stackwise_activation = ["relu", "relu", "hard_swish"],
-        include_rescaling = False,
         output_filter=1280,
         activation="hard_swish",
         inverted_res_block=True,
diff --git a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py
index 0fbcca7675..b16d1b92af 100644
--- a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -33,7 +33,6 @@ def setUp(self):
             stackwise_num_strides=[2, 2, 1],
             stackwise_se_ratio=[0.25, None, 0.25],
             stackwise_activation=["relu", "relu", "hard_swish"],
-            include_rescaling=False,
             output_num_filters=1280,
             input_activation="hard_swish",
             output_activation="hard_swish",
diff --git a/keras_hub/src/models/pali_gemma/pali_gemma_vit.py b/keras_hub/src/models/pali_gemma/pali_gemma_vit.py
index e9da150c08..c47507703a 100644
--- a/keras_hub/src/models/pali_gemma/pali_gemma_vit.py
+++ b/keras_hub/src/models/pali_gemma/pali_gemma_vit.py
@@ -476,6 +476,9 @@ def __init__(
             shape=(image_size, image_size, 3), name="images"
         )
         x = image_input  # Intermediate result.
+        # TODO we have moved this rescaling to preprocessing layers for most
+        # models. We should consider removing it here, though it would break
+        # compatibility.
         if include_rescaling:
             rescaling = keras.layers.Rescaling(
                 scale=1.0 / 127.5, offset=-1.0, name="rescaling"
diff --git a/keras_hub/src/models/resnet/resnet_backbone.py b/keras_hub/src/models/resnet/resnet_backbone.py
index 7f585ba1f2..638ccb8079 100644
--- a/keras_hub/src/models/resnet/resnet_backbone.py
+++ b/keras_hub/src/models/resnet/resnet_backbone.py
@@ -44,9 +44,6 @@ class ResNetBackbone(FeaturePyramidBackbone):
     additional pooling operation rather than performing downsampling within
     the convolutional layers themselves.
 
-    Note that `ResNetBackbone` expects the inputs to be images with a value
-    range of `[0, 255]` when `include_rescaling=True`.
-
     Args:
         input_conv_filters: list of ints. The number of filters of the initial
             convolution(s).
@@ -65,9 +62,6 @@ class ResNetBackbone(FeaturePyramidBackbone):
             variants.
         use_pre_activation: boolean. Whether to use pre-activation or not.
             `True` for ResNetV2, `False` for ResNet.
-        include_rescaling: boolean. If `True`, rescale the input using
-            `Rescaling` and `Normalization` layers. If `False`, do nothing.
-            Defaults to `True`.
         image_shape: tuple. The input shape without the batch size.
             Defaults to `(None, None, 3)`.
         pooling: `None` or str. Pooling mode for feature extraction. Defaults
@@ -124,7 +118,6 @@ def __init__(
         stackwise_num_strides,
         block_type,
         use_pre_activation=False,
-        include_rescaling=True,
         image_shape=(None, None, 3),
         data_format=None,
         dtype=None,
@@ -170,18 +163,7 @@ def __init__(
 
         # === Functional Model ===
         image_input = layers.Input(shape=image_shape)
-        if include_rescaling:
-            x = layers.Rescaling(scale=1 / 255.0, dtype=dtype)(image_input)
-            x = layers.Normalization(
-                axis=bn_axis,
-                mean=(0.485, 0.456, 0.406),
-                variance=(0.229**2, 0.224**2, 0.225**2),
-                dtype=dtype,
-                name="normalization",
-            )(x)
-        else:
-            x = image_input
-
+        x = image_input  # Intermediate result.
         # The padding between torch and tensorflow/jax differs when `strides>1`.
         # Therefore, we need to manually pad the tensor.
         x = layers.ZeroPadding2D(
@@ -299,7 +281,6 @@ def __init__(
         self.stackwise_num_strides = stackwise_num_strides
         self.block_type = block_type
         self.use_pre_activation = use_pre_activation
-        self.include_rescaling = include_rescaling
         self.image_shape = image_shape
         self.pyramid_outputs = pyramid_outputs
         self.data_format = data_format
@@ -315,7 +296,6 @@ def get_config(self):
                 "stackwise_num_strides": self.stackwise_num_strides,
                 "block_type": self.block_type,
                 "use_pre_activation": self.use_pre_activation,
-                "include_rescaling": self.include_rescaling,
                 "image_shape": self.image_shape,
             }
         )
diff --git a/keras_hub/src/models/resnet/resnet_image_classifier.py b/keras_hub/src/models/resnet/resnet_image_classifier.py
index a7456cb85b..4440ef145c 100644
--- a/keras_hub/src/models/resnet/resnet_image_classifier.py
+++ b/keras_hub/src/models/resnet/resnet_image_classifier.py
@@ -85,7 +85,6 @@ class ResNetImageClassifier(ImageClassifier):
         stackwise_num_strides=[1, 2, 2],
         block_type="basic_block",
         use_pre_activation=True,
-        include_rescaling=False,
         pooling="avg",
     )
     classifier = keras_hub.models.ResNetImageClassifier(
diff --git a/keras_hub/src/models/resnet/resnet_image_classifier_test.py b/keras_hub/src/models/resnet/resnet_image_classifier_test.py
index d9de3719ac..92c689c097 100644
--- a/keras_hub/src/models/resnet/resnet_image_classifier_test.py
+++ b/keras_hub/src/models/resnet/resnet_image_classifier_test.py
@@ -34,7 +34,6 @@ def setUp(self):
             block_type="basic_block",
             use_pre_activation=True,
             image_shape=(16, 16, 3),
-            include_rescaling=False,
         )
         self.init_kwargs = {
             "backbone": self.backbone,
@@ -62,7 +61,7 @@ def test_head_dtype(self):
     @pytest.mark.large
     def test_smallest_preset(self):
         # Test that our forward pass is stable!
-        image_batch = self.load_test_image()[None, ...]
+        image_batch = self.load_test_image()[None, ...] / 255.
         self.run_preset_test(
             cls=ResNetImageClassifier,
             preset="resnet_18_imagenet",
diff --git a/keras_hub/src/models/resnet/resnet_presets.py b/keras_hub/src/models/resnet/resnet_presets.py
index 99e448f24d..7264558a7a 100644
--- a/keras_hub/src/models/resnet/resnet_presets.py
+++ b/keras_hub/src/models/resnet/resnet_presets.py
@@ -25,7 +25,7 @@
             "path": "resnet",
             "model_card": "https://arxiv.org/abs/2110.00476",
         },
-        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_18_imagenet/2",
+        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_18_imagenet/3",
     },
     "resnet_50_imagenet": {
         "metadata": {
@@ -38,7 +38,7 @@
             "path": "resnet",
             "model_card": "https://arxiv.org/abs/2110.00476",
         },
-        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_50_imagenet/2",
+        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_50_imagenet/3",
     },
     "resnet_101_imagenet": {
         "metadata": {
@@ -51,7 +51,7 @@
             "path": "resnet",
             "model_card": "https://arxiv.org/abs/2110.00476",
         },
-        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_101_imagenet/2",
+        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_101_imagenet/3",
     },
     "resnet_152_imagenet": {
         "metadata": {
@@ -64,7 +64,7 @@
             "path": "resnet",
             "model_card": "https://arxiv.org/abs/2110.00476",
         },
-        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_152_imagenet/2",
+        "kaggle_handle": "kaggle://kerashub/resnetv1/keras/resnet_152_imagenet/3",
     },
     "resnet_v2_50_imagenet": {
         "metadata": {
@@ -77,7 +77,7 @@
             "path": "resnet",
             "model_card": "https://arxiv.org/abs/2110.00476",
         },
-        "kaggle_handle": "kaggle://kerashub/resnetv2/keras/resnet_v2_50_imagenet/2",
+        "kaggle_handle": "kaggle://kerashub/resnetv2/keras/resnet_v2_50_imagenet/3",
     },
     "resnet_v2_101_imagenet": {
         "metadata": {
@@ -90,6 +90,6 @@
             "path": "resnet",
             "model_card": "https://arxiv.org/abs/2110.00476",
         },
-        "kaggle_handle": "kaggle://kerashub/resnetv2/keras/resnet_v2_101_imagenet/2",
+        "kaggle_handle": "kaggle://kerashub/resnetv2/keras/resnet_v2_101_imagenet/3",
     },
 }
diff --git a/keras_hub/src/models/vgg/vgg_backbone.py b/keras_hub/src/models/vgg/vgg_backbone.py
index 541b3600ef..771c45ce5e 100644
--- a/keras_hub/src/models/vgg/vgg_backbone.py
+++ b/keras_hub/src/models/vgg/vgg_backbone.py
@@ -33,8 +33,6 @@ class VGGBackbone(Backbone):
       stackwise_num_filters: list of ints, filter size for convolutional
             blocks per VGG block. For both VGG16 and VGG19 this is [
             64, 128, 256, 512, 512].
-      include_rescaling: bool, whether to rescale the inputs. If set to
-        True, inputs will be passed through a `Rescaling(1/255.0)` layer.
       image_shape: tuple, optional shape tuple, defaults to (224, 224, 3).
       pooling: bool, Optional pooling mode for feature extraction
         when `include_top` is `False`.
@@ -61,7 +59,6 @@ class VGGBackbone(Backbone):
         stackwise_num_repeats = [2, 2, 3, 3, 3],
         stackwise_num_filters = [64, 128, 256, 512, 512],
         image_shape = (224, 224, 3),
-        include_rescaling = False,
         pooling = "avg",
     )
     model(input_data)
@@ -72,7 +69,6 @@ def __init__(
         self,
         stackwise_num_repeats,
         stackwise_num_filters,
-        include_rescaling,
         image_shape=(224, 224, 3),
         pooling="avg",
         **kwargs,
@@ -82,8 +78,6 @@ def __init__(
         img_input = keras.layers.Input(shape=image_shape)
         x = img_input
 
-        if include_rescaling:
-            x = layers.Rescaling(scale=1 / 255.0)(x)
         for stack_index in range(len(stackwise_num_repeats) - 1):
             x = apply_vgg_block(
                 x=x,
@@ -105,7 +99,6 @@ def __init__(
         # === Config ===
         self.stackwise_num_repeats = stackwise_num_repeats
         self.stackwise_num_filters = stackwise_num_filters
-        self.include_rescaling = include_rescaling
         self.image_shape = image_shape
         self.pooling = pooling
 
@@ -113,7 +106,6 @@ def get_config(self):
         return {
             "stackwise_num_repeats": self.stackwise_num_repeats,
             "stackwise_num_filters": self.stackwise_num_filters,
-            "include_rescaling": self.include_rescaling,
             "image_shape": self.image_shape,
             "pooling": self.pooling,
         }
diff --git a/keras_hub/src/models/vgg/vgg_backbone_test.py b/keras_hub/src/models/vgg/vgg_backbone_test.py
index 38f7d03606..76b279dc73 100644
--- a/keras_hub/src/models/vgg/vgg_backbone_test.py
+++ b/keras_hub/src/models/vgg/vgg_backbone_test.py
@@ -25,7 +25,6 @@ def setUp(self):
             "stackwise_num_repeats": [2, 3, 3],
             "stackwise_num_filters": [8, 64, 64],
             "image_shape": (16, 16, 3),
-            "include_rescaling": False,
             "pooling": "avg",
         }
         self.input_data = np.ones((2, 16, 16, 3), dtype="float32")
diff --git a/keras_hub/src/models/vgg/vgg_image_classifier.py b/keras_hub/src/models/vgg/vgg_image_classifier.py
index 6b9733c250..2e3c42285a 100644
--- a/keras_hub/src/models/vgg/vgg_image_classifier.py
+++ b/keras_hub/src/models/vgg/vgg_image_classifier.py
@@ -66,7 +66,6 @@ class VGGImageClassifier(ImageClassifier):
         stackwise_num_repeats = [2, 2, 3, 3, 3],
         stackwise_num_filters = [64, 128, 256, 512, 512],
         image_shape = (224, 224, 3),
-        include_rescaling = False,
         pooling = "avg",
     )
     classifier = keras_hub.models.VGGImageClassifier(
diff --git a/keras_hub/src/models/vgg/vgg_image_classifier_test.py b/keras_hub/src/models/vgg/vgg_image_classifier_test.py
index 83ec811bbf..b62e56ae99 100644
--- a/keras_hub/src/models/vgg/vgg_image_classifier_test.py
+++ b/keras_hub/src/models/vgg/vgg_image_classifier_test.py
@@ -28,7 +28,6 @@ def setUp(self):
             stackwise_num_repeats=[2, 4, 4],
             stackwise_num_filters=[2, 16, 16],
             image_shape=(4, 4, 3),
-            include_rescaling=False,
             pooling="max",
         )
         self.init_kwargs = {
diff --git a/keras_hub/src/models/vit_det/vit_det_backbone.py b/keras_hub/src/models/vit_det/vit_det_backbone.py
index 0aed62fd11..b634f0936e 100644
--- a/keras_hub/src/models/vit_det/vit_det_backbone.py
+++ b/keras_hub/src/models/vit_det/vit_det_backbone.py
@@ -46,9 +46,6 @@ class ViTDetBackbone(Backbone):
             global attention.
         image_shape (tuple[int], optional): The size of the input image in
             `(H, W, C)` format. Defaults to `(1024, 1024, 3)`.
-        include_rescaling (bool, optional): Whether to rescale the inputs. If
-            set to `True`, inputs will be passed through a
-            `Rescaling(1/255.0)` layer. Defaults to `False`.
         patch_size (int, optional): the patch size to be supplied to the
             Patching layer to turn input images into a flattened sequence of
             patches. Defaults to `16`.
@@ -96,7 +93,6 @@ def __init__(
         intermediate_dim,
         num_heads,
         global_attention_layer_indices,
-        include_rescaling=True,
         image_shape=(1024, 1024, 3),
         patch_size=16,
         num_output_channels=256,
@@ -123,9 +119,6 @@ def __init__(
             )
         img_size = img_input.shape[-3]
         x = img_input
-        if include_rescaling:
-            # Use common rescaling strategy across keras_cv
-            x = keras.layers.Rescaling(1.0 / 255.0)(x)
         # VITDet scales inputs based on the standard ImageNet mean/stddev.
         x = (x - ops.array([0.485, 0.456, 0.406], dtype=x.dtype)) / (
             ops.array([0.229, 0.224, 0.225], dtype=x.dtype)
@@ -179,14 +172,12 @@ def __init__(
         self.window_size = window_size
         self.global_attention_layer_indices = global_attention_layer_indices
         self.layer_norm_epsilon = layer_norm_epsilon
-        self.include_rescaling = include_rescaling
 
     def get_config(self):
         config = super().get_config()
         config.update(
             {
                 "image_shape": self.image_shape,
-                "include_rescaling": self.include_rescaling,
                 "patch_size": self.patch_size,
                 "hidden_size": self.hidden_size,
                 "num_layers": self.num_layers,
diff --git a/keras_hub/src/models/vit_det/vit_det_backbone_test.py b/keras_hub/src/models/vit_det/vit_det_backbone_test.py
index d8c1b2d24c..5bd3e0622b 100644
--- a/keras_hub/src/models/vit_det/vit_det_backbone_test.py
+++ b/keras_hub/src/models/vit_det/vit_det_backbone_test.py
@@ -22,7 +22,6 @@
 class ViTDetBackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
-            "include_rescaling": True,
             "image_shape": (16, 16, 3),
             "patch_size": 2,
             "hidden_size": 4,
diff --git a/keras_hub/src/utils/timm/convert_resnet.py b/keras_hub/src/utils/timm/convert_resnet.py
index 8042d5f5f1..f5dc10e822 100644
--- a/keras_hub/src/utils/timm/convert_resnet.py
+++ b/keras_hub/src/utils/timm/convert_resnet.py
@@ -151,14 +151,6 @@ def port_batch_normalization(keras_layer_name, hf_weight_prefix):
     if version == "v2":
         port_batch_normalization("post_bn", "norm")
 
-    # Rebuild normalization layer with pretrained mean & std
-    mean = timm_config["pretrained_cfg"]["mean"]
-    std = timm_config["pretrained_cfg"]["std"]
-    normalization_layer = backbone.get_layer("normalization")
-    normalization_layer.input_mean = mean
-    normalization_layer.input_variance = [s**2 for s in std]
-    normalization_layer.build(normalization_layer._build_input_shape)
-
 
 def convert_head(task, loader, timm_config):
     v2 = "resnetv2_" in timm_config["architecture"]
diff --git a/keras_hub/src/utils/timm/preset_loader.py b/keras_hub/src/utils/timm/preset_loader.py
index 123cdf9674..0993a9a5d6 100644
--- a/keras_hub/src/utils/timm/preset_loader.py
+++ b/keras_hub/src/utils/timm/preset_loader.py
@@ -62,5 +62,20 @@ def load_image_converter(self, cls, **kwargs):
         pretrained_cfg = self.config.get("pretrained_cfg", None)
         if not pretrained_cfg or "input_size" not in pretrained_cfg:
             return None
+        # This assumes the same basic setup for all timm preprocessing, and that
+        # all our image conversion will be via a `ResizingImageConverter. We may
+        # need to extend this as we cover more model types.
         input_size = pretrained_cfg["input_size"]
-        return cls(width=input_size[1], height=input_size[2])
+        mean = pretrained_cfg["mean"]
+        variance = [s**2 for s in pretrained_cfg["std"]]
+        interpolation = pretrained_cfg["interpolation"]
+        if interpolation not in ("bilinear", "nearest", "bicubic"):
+            interpolation = "bilinear"  # Unsupported interpolation type.
+        return cls(
+            width=input_size[1],
+            height=input_size[2],
+            scale=1 / 255.0,
+            mean=mean,
+            variance=variance,
+            interpolation=interpolation,
+        )
diff --git a/tools/checkpoint_conversion/convert_resnet_checkpoints.py b/tools/checkpoint_conversion/convert_resnet_checkpoints.py
index eae4554256..530d285f5b 100644
--- a/tools/checkpoint_conversion/convert_resnet_checkpoints.py
+++ b/tools/checkpoint_conversion/convert_resnet_checkpoints.py
@@ -75,21 +75,36 @@ def validate_output(keras_model, timm_model):
     image = PIL.Image.open(file)
     batch = np.array([image])
 
-    # Call with Timm.
-    timm_batch = keras_model.preprocessor(batch)
-    timm_batch = keras.ops.transpose(timm_batch, axes=(0, 3, 1, 2)) / 255.0
+    # Preprocess with Timm.
+    data_config = timm.data.resolve_model_data_config(timm_model)
+    data_config["crop_pct"] = 1.0  # Stop timm from cropping.
+    transforms = timm.data.create_transform(**data_config, is_training=False)
+    timm_preprocessed = transforms(image)
+    timm_preprocessed = keras.ops.transpose(timm_preprocessed, axes=(1, 2, 0))
+    timm_preprocessed = keras.ops.expand_dims(timm_preprocessed, 0)
+
+    # Preprocess with Keras.
+    keras_preprocessed = keras_model.preprocessor(batch)
+
+    # Call with Timm. Use the keras preprocessed image so we can keep modeling
+    # and preprocessing comparisons independent.
+    timm_batch = keras.ops.transpose(keras_preprocessed, axes=(0, 3, 1, 2))
     timm_batch = torch.from_numpy(np.array(timm_batch))
     timm_outputs = timm_model(timm_batch).detach().numpy()
     timm_label = np.argmax(timm_outputs[0])
+
     # Call with Keras.
     keras_outputs = keras_model.predict(batch)
     keras_label = np.argmax(keras_outputs[0])
 
     print("🔶 Keras output:", keras_outputs[0, :10])
     print("🔶 TIMM output:", timm_outputs[0, :10])
-    print("🔶 Difference:", np.mean(np.abs(keras_outputs - timm_outputs)))
     print("🔶 Keras label:", keras_label)
     print("🔶 TIMM label:", timm_label)
+    modeling_diff = np.mean(np.abs(keras_outputs - timm_outputs))
+    print("🔶 Modeling difference:", modeling_diff)
+    preprocessing_diff = np.mean(np.abs(keras_preprocessed - timm_preprocessed))
+    print("🔶 Preprocessing difference:", preprocessing_diff)
 
 
 def main(_):