From b1a5427771dca159ac4a4e8fa054b076bb1d90d8 Mon Sep 17 00:00:00 2001 From: innat Date: Sat, 2 Mar 2024 23:51:46 +0600 Subject: [PATCH] run formatters" --- keras_cv/models/backbones/backbone_presets.py | 2 +- .../video_swin/video_swin_aliases.py | 5 ++- .../video_swin/video_swin_backbone.py | 34 +++++++++++-------- .../video_classifier_presets.py | 22 +++++------- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py index 93d9595c6f..b77163aa8f 100644 --- a/keras_cv/models/backbones/backbone_presets.py +++ b/keras_cv/models/backbones/backbone_presets.py @@ -28,9 +28,9 @@ from keras_cv.models.backbones.mobilenet_v3 import mobilenet_v3_backbone_presets from keras_cv.models.backbones.resnet_v1 import resnet_v1_backbone_presets from keras_cv.models.backbones.resnet_v2 import resnet_v2_backbone_presets +from keras_cv.models.backbones.video_swin import video_swin_backbone_presets from keras_cv.models.backbones.vit_det import vit_det_backbone_presets from keras_cv.models.object_detection.yolo_v8 import yolo_v8_backbone_presets -from keras_cv.models.backbones.video_swin import video_swin_backbone_presets backbone_presets_no_weights = { **resnet_v1_backbone_presets.backbone_presets_no_weights, diff --git a/keras_cv/models/backbones/video_swin/video_swin_aliases.py b/keras_cv/models/backbones/video_swin/video_swin_aliases.py index ca31cac84e..56db9ca743 100644 --- a/keras_cv/models/backbones/video_swin/video_swin_aliases.py +++ b/keras_cv/models/backbones/video_swin/video_swin_aliases.py @@ -13,11 +13,14 @@ # limitations under the License. import copy + from keras_cv.models.backbones.video_swin.video_swin_backbone import ( VideoSwinBackbone, ) +from keras_cv.models.backbones.video_swin.video_swin_backbone_presets import ( + backbone_presets, +) from keras_cv.utils.python_utils import classproperty -from keras_cv.models.backbones.video_swin.video_swin_backbone_presets import backbone_presets ALIAS_DOCSTRING = """VideoSwin{size}Backbone model. diff --git a/keras_cv/models/backbones/video_swin/video_swin_backbone.py b/keras_cv/models/backbones/video_swin/video_swin_backbone.py index baf44161cc..a6c0868699 100644 --- a/keras_cv/models/backbones/video_swin/video_swin_backbone.py +++ b/keras_cv/models/backbones/video_swin/video_swin_backbone.py @@ -16,10 +16,10 @@ import numpy as np from keras import layers -from keras_cv.backend import ops from keras_cv.api_export import keras_cv_export from keras_cv.backend import keras +from keras_cv.backend import ops from keras_cv.layers.video_swin_layers import VideoSwinBasicLayer from keras_cv.layers.video_swin_layers import VideoSwinPatchingAndEmbedding from keras_cv.layers.video_swin_layers import VideoSwinPatchMerging @@ -33,7 +33,7 @@ class VideoSwinBackbone(Backbone): Args: input_shape (tuple[int], optional): The size of the input image in - `(depth, height, width, channel)` format. + `(depth, height, width, channel)` format. Defaults to `(32, 224, 224, 3)`. input_tensor (KerasTensor, optional): Output of `keras.layers.Input()`) to use as image input for the model. @@ -51,18 +51,18 @@ class VideoSwinBackbone(Backbone): num_heads (tuple[int]): Number of attention head of each stage. Default to [3, 6, 12, 24] window_size (int): Window size. Default to [8, 7, 7]. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default to 4. - qkv_bias (bool): If True, add a learnable bias to query, key, value. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default to True. qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default to None. drop_rate (float): Dropout rate. attn_drop_rate (float): Attention dropout rate. Default: 0. drop_path_rate (float): Stochastic depth rate. Default: 0.2. - patch_norm (bool): If True, add normalization after patch embedding. + patch_norm (bool): If True, add normalization after patch embedding. Default to False. - + References: - [Video Swin Transformer](https://arxiv.org/abs/2106.13230) - [Official Code](https://github.com/SwinTransformer/Video-Swin-Transformer) @@ -94,7 +94,11 @@ def __init__( ) # Check that the input video is well specified. - if input_spec.shape[-4] is None or input_spec.shape[-3] is None or input_spec.shape[-2] is None: + if ( + input_spec.shape[-4] is None + or input_spec.shape[-3] is None + or input_spec.shape[-2] is None + ): raise ValueError( "Depth, height and width of the video must be specified" " in `input_shape`." @@ -112,11 +116,11 @@ def __init__( # Use common rescaling strategy across keras_cv x = keras.layers.Rescaling(1.0 / 255.0)(x) - # Video Swin scales inputs based on the standard ImageNet mean/stddev. - # Officially, Videw Swin takes tensor of [0-255] ranges. - # And use mean=[123.675, 116.28, 103.53] and - # std=[58.395, 57.12, 57.375] for normalization. - # So, if include_rescaling is set to True, then, to match with the + # VideoSwin scales inputs based on the ImageNet mean/stddev. + # Officially, Videw Swin takes tensor of [0-255] ranges. + # And use mean=[123.675, 116.28, 103.53] and + # std=[58.395, 57.12, 57.375] for normalization. + # So, if include_rescaling is set to True, then, to match with the # official scores, following normalization should be added. x = (x - ops.array([0.485, 0.456, 0.406], dtype=x.dtype)) / ( ops.array([0.229, 0.224, 0.225], dtype=x.dtype) @@ -147,9 +151,9 @@ def __init__( attn_drop_rate=attn_drop_rate, drop_path_rate=dpr[sum(depths[:i]) : sum(depths[: i + 1])], norm_layer=norm_layer, - downsample=VideoSwinPatchMerging - if (i < num_layers - 1) - else None, + downsample=( + VideoSwinPatchMerging if (i < num_layers - 1) else None + ), name=f"videoswin_basic_layer_{i + 1}", ) x = layer(x) diff --git a/keras_cv/models/classification/video_classifier_presets.py b/keras_cv/models/classification/video_classifier_presets.py index 2a8447bd20..384373c1f9 100644 --- a/keras_cv/models/classification/video_classifier_presets.py +++ b/keras_cv/models/classification/video_classifier_presets.py @@ -16,9 +16,7 @@ classifier_presets = { "videoswin_tiny_kinetics_classifier": { "metadata": { - "description": ( - "videoswin_tiny_kinetics " # TODO: update - ), + "description": ("videoswin_tiny_kinetics "), # TODO: update "params": 25_613_800, "official_name": "VideoClassifier", "path": "video_classifier", @@ -26,20 +24,16 @@ }, "videoswin_small_kinetics_classifier": { "metadata": { - "description": ( - "videoswin_small_kinetics " # TODO: update - ), - "params": 25_613_800, # TODO: update + "description": ("videoswin_small_kinetics "), # TODO: update + "params": 25_613_800, # TODO: update "official_name": "VideoClassifier", "path": "video_classifier", }, }, "videoswin_base_kinetics_classifier": { "metadata": { - "description": ( - "videoswin_base_kinetics " # TODO: update - ), - "params": 25_613_800, # TODO: update + "description": ("videoswin_base_kinetics "), # TODO: update + "params": 25_613_800, # TODO: update "official_name": "VideoClassifier", "path": "video_classifier", }, @@ -47,11 +41,11 @@ "videoswin_base_something_something_v2_classifier": { "metadata": { "description": ( - "videoswin_base_something_something_v2 " # TODO: update + "videoswin_base_something_something_v2 " # TODO: update ), - "params": 25_613_800, # TODO: update + "params": 25_613_800, # TODO: update "official_name": "VideoClassifier", "path": "video_classifier", }, }, -} \ No newline at end of file +}