Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Differential Binarization model from PaddleOCR to Keras3 #1739

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions keras_hub/api/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
from keras_hub.src.models.densenet.densenet_image_converter import (
DenseNetImageConverter,
)
from keras_hub.src.models.differential_binarization.differential_binarization_image_converter import (
DifferentialBinarizationImageConverter,
)
from keras_hub.src.models.efficientnet.efficientnet_image_converter import (
EfficientNetImageConverter,
)
Expand Down
9 changes: 9 additions & 0 deletions keras_hub/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@
from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import (
DenseNetImageClassifierPreprocessor,
)
from keras_hub.src.models.differential_binarization.differential_binarization import (
DifferentialBinarization,
)
from keras_hub.src.models.differential_binarization.differential_binarization_backbone import (
DifferentialBinarizationBackbone,
)
from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import (
DifferentialBinarizationPreprocessor,
)
from keras_hub.src.models.distil_bert.distil_bert_backbone import (
DistilBertBackbone,
)
Expand Down
9 changes: 9 additions & 0 deletions keras_hub/src/models/differential_binarization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from keras_hub.src.models.differential_binarization.differential_binarization_backbone import (
DifferentialBinarizationBackbone,
)
from keras_hub.src.models.differential_binarization.differential_binarization_presets import (
backbone_presets,
)
from keras_hub.src.utils.preset_utils import register_presets

register_presets(backbone_presets, DifferentialBinarizationBackbone)
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import math

import keras
from keras import layers

from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.models.differential_binarization.differential_binarization_backbone import (
DifferentialBinarizationBackbone,
)
from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import (
DifferentialBinarizationPreprocessor,
)
from keras_hub.src.models.differential_binarization.losses import DBLoss
from keras_hub.src.models.image_segmenter import ImageSegmenter


@keras_hub_export("keras_hub.models.DifferentialBinarization")
class DifferentialBinarization(ImageSegmenter):
"""
A Keras model implementing the Differential Binarization
architecture for scene text detection, described in
[Real-time Scene Text Detection with Differentiable Binarization](
https://arxiv.org/abs/1911.08947).

Args:
backbone: A `keras_hub.models.DifferentialBinarizationBackbone`
instance.
head_kernel_list: list of ints. The number of filters for probability
and threshold maps. Defaults to [3, 2, 2].
step_function_k: float. `k` parameter used within the differential
binarization step function.
preprocessor: `None`, a `keras_hub.models.Preprocessor` instance,
a `keras.Layer` instance, or a callable. If `None` no preprocessing
will be applied to the inputs.

Examples:
```python
input_data = np.ones(shape=(8, 224, 224, 3))

image_encoder = keras_hub.models.ResNetBackbone.from_preset(
"resnet_vd_50_imagenet"
)
backbone = keras_hub.models.DifferentialBinarizationBackbone(image_encoder)
detector = keras_hub.models.DifferentialBinarization(
backbone=backbone
)

detector(input_data)
```
"""

backbone_cls = DifferentialBinarizationBackbone
preprocessor_cls = DifferentialBinarizationPreprocessor

def __init__(
self,
backbone,
head_kernel_list=[3, 2, 2],
step_function_k=50.0,
preprocessor=None,
**kwargs,
):

inputs = backbone.input
x = backbone(inputs)
probability_maps = diffbin_head(
x,
in_channels=backbone.fpn_channels,
kernel_list=head_kernel_list,
name="head_prob",
)
threshold_maps = diffbin_head(
x,
in_channels=backbone.fpn_channels,
kernel_list=head_kernel_list,
name="head_thresh",
)
binary_maps = step_function(
probability_maps, threshold_maps, k=step_function_k
)
outputs = layers.Concatenate(axis=-1)(
[probability_maps, threshold_maps, binary_maps]
)

super().__init__(inputs=inputs, outputs=outputs, **kwargs)

self.backbone = backbone
self.head_kernel_list = head_kernel_list
self.step_function_k = step_function_k
self.preprocessor = preprocessor

def compile(
self,
optimizer="auto",
loss="auto",
**kwargs,
):
"""Configures the `DifferentialBinarization` task for training.

`DifferentialBinarization` extends the default compilation signature of
`keras.Model.compile` with defaults for `optimizer` and `loss`. To
override these defaults, pass any value to these arguments during
compilation.

Args:
optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer`
instance. Defaults to `"auto"`, which uses the default optimizer
for `DifferentialBinarization`. See `keras.Model.compile` and
`keras.optimizers` for more info on possible `optimizer` values.
loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance.
Defaults to `"auto"`, in which case the default loss
computation of `DifferentialBinarization` will be applied. See
`keras.Model.compile` and `keras.losses` for more info on
possible `loss` values.
**kwargs: See `keras.Model.compile` for a full list of arguments
supported by the compile method.
"""
if optimizer == "auto":
optimizer = keras.optimizers.SGD(
learning_rate=0.007, weight_decay=0.0001, momentum=0.9
)
if loss == "auto":
loss = DBLoss()
super().compile(
optimizer=optimizer,
loss=loss,
**kwargs,
)

def get_config(self):
# Backbone serialized in `super`
config = super().get_config()
config.update(
{
"head_kernel_list": self.head_kernel_list,
"step_function_k": self.step_function_k,
}
)
return config


def step_function(x, y, k):
return 1.0 / (1.0 + keras.ops.exp(-k * (x - y)))


def diffbin_head(inputs, in_channels, kernel_list, name):
x = layers.Conv2D(
in_channels // 4,
kernel_size=kernel_list[0],
padding="same",
use_bias=False,
name=f"{name}_conv0_weights",
)(inputs)
x = layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(1e-4),
gamma_initializer=keras.initializers.Constant(1.0),
name=f"{name}_conv0_bn",
)(x)
x = layers.ReLU(name=f"{name}_conv0_relu")(x)
x = layers.Conv2DTranspose(
in_channels // 4,
kernel_size=kernel_list[1],
strides=2,
padding="valid",
bias_initializer=keras.initializers.RandomUniform(
minval=-1.0 / math.sqrt(in_channels // 4 * 1.0),
maxval=1.0 / math.sqrt(in_channels // 4 * 1.0),
),
name=f"{name}_conv1_weights",
)(x)
x = layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(1e-4),
gamma_initializer=keras.initializers.Constant(1.0),
name=f"{name}_conv1_bn",
)(x)
x = layers.ReLU(name=f"{name}_conv1_relu")(x)
x = layers.Conv2DTranspose(
1,
kernel_size=kernel_list[2],
strides=2,
padding="valid",
activation="sigmoid",
bias_initializer=keras.initializers.RandomUniform(
minval=-1.0 / math.sqrt(in_channels // 4 * 1.0),
maxval=1.0 / math.sqrt(in_channels // 4 * 1.0),
),
name=f"{name}_conv2_weights",
)(x)
return x
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import keras
from keras import layers

from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.models.backbone import Backbone


@keras_hub_export("keras_hub.models.DifferentialBinarizationBackbone")
class DifferentialBinarizationBackbone(Backbone):
"""
A Keras model implementing the Differential Binarization
architecture for scene text detection, described in
[Real-time Scene Text Detection with Differentiable Binarization](
https://arxiv.org/abs/1911.08947).

This class contains the backbone architecture containing the feature
pyramid network.

Args:
image_encoder: A `keras_hub.models.ResNetBackbone` instance.
fpn_channels: int. The number of channels to output by the feature
pyramid network. Defaults to 256.
"""

def __init__(
self,
image_encoder,
fpn_channels=256,
**kwargs,
):
inputs = image_encoder.input
x = image_encoder.pyramid_outputs
x = diffbin_fpn_model(x, out_channels=fpn_channels)

super().__init__(inputs=inputs, outputs=x, **kwargs)

self.image_encoder = image_encoder
self.fpn_channels = fpn_channels

def get_config(self):
config = super().get_config()
config["fpn_channels"] = self.fpn_channels
config["image_encoder"] = keras.layers.serialize(self.image_encoder)
return config

@classmethod
def from_config(cls, config):
config["image_encoder"] = keras.layers.deserialize(
config["image_encoder"]
)
return cls(**config)


def diffbin_fpn_model(inputs, out_channels):
in2 = layers.Conv2D(
out_channels, kernel_size=1, use_bias=False, name="neck_in2"
)(inputs["P2"])
in3 = layers.Conv2D(
out_channels, kernel_size=1, use_bias=False, name="neck_in3"
)(inputs["P3"])
in4 = layers.Conv2D(
out_channels, kernel_size=1, use_bias=False, name="neck_in4"
)(inputs["P4"])
in5 = layers.Conv2D(
out_channels, kernel_size=1, use_bias=False, name="neck_in5"
)(inputs["P5"])
out4 = layers.Add(name="add1")([layers.UpSampling2D()(in5), in4])
out3 = layers.Add(name="add2")([layers.UpSampling2D()(out4), in3])
out2 = layers.Add(name="add3")([layers.UpSampling2D()(out3), in2])
p5 = layers.Conv2D(
out_channels // 4,
kernel_size=3,
padding="same",
use_bias=False,
name="neck_p5",
)(in5)
p4 = layers.Conv2D(
out_channels // 4,
kernel_size=3,
padding="same",
use_bias=False,
name="neck_p4",
)(out4)
p3 = layers.Conv2D(
out_channels // 4,
kernel_size=3,
padding="same",
use_bias=False,
name="neck_p3",
)(out3)
p2 = layers.Conv2D(
out_channels // 4,
kernel_size=3,
padding="same",
use_bias=False,
name="neck_p2",
)(out2)
p5 = layers.UpSampling2D((8, 8))(p5)
p4 = layers.UpSampling2D((4, 4))(p4)
p3 = layers.UpSampling2D((2, 2))(p3)

fused = layers.Concatenate(axis=-1)([p5, p4, p3, p2])
return fused
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from keras import ops

from keras_hub.src.models.differential_binarization.differential_binarization_backbone import (
DifferentialBinarizationBackbone,
)
from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import (
DifferentialBinarizationPreprocessor,
)
from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone
from keras_hub.src.tests.test_case import TestCase


class DifferentialBinarizationTest(TestCase):
def setUp(self):
self.batch_size = 2
self.image_size = 16
self.images = ops.ones((2, 224, 224, 3))
self.image_encoder = ResNetBackbone(
input_conv_filters=[64],
input_conv_kernel_sizes=[7],
stackwise_num_filters=[64, 128, 256, 512],
stackwise_num_blocks=[3, 4, 6, 3],
stackwise_num_strides=[1, 2, 2, 2],
block_type="bottleneck_block",
image_shape=(224, 224, 3),
)
self.preprocessor = DifferentialBinarizationPreprocessor()
self.init_kwargs = {
"image_encoder": self.image_encoder,
}

def test_backbone_basics(self):
self.run_backbone_test(
cls=DifferentialBinarizationBackbone,
init_kwargs=self.init_kwargs,
input_data=self.images,
expected_output_shape=(
2,
56,
56,
256,
),
run_mixed_precision_check=False,
run_quantization_check=False,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
from keras_hub.src.models.differential_binarization.differential_binarization_backbone import (
DifferentialBinarizationBackbone,
)


@keras_hub_export("keras_hub.layers.DifferentialBinarizationImageConverter")
class DifferentialBinarizationImageConverter(ImageConverter):
backbone_cls = DifferentialBinarizationBackbone
Loading
Loading