diff --git a/docs/configuration_reference/behavior_version.rst b/docs/configuration_reference/behavior_version.rst index 60c30d016..af8a270ac 100644 --- a/docs/configuration_reference/behavior_version.rst +++ b/docs/configuration_reference/behavior_version.rst @@ -22,6 +22,15 @@ and not listing legacy/deprecated parameters. Version History --------------- +Behavior version 11 (2021-12-16) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Broadcasting dims no longer match in :class:`CombineLayer` and others. +This was never needed, instead broadcasting happens in RETURNN automatically to non-existing dims. +To fix this, do not add any broadcasting dims. + +See issue `#666 `__. + Behavior version 10 (2021-12-07) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/returnn/tf/layers/basic.py b/returnn/tf/layers/basic.py index 7839a4730..d15274410 100644 --- a/returnn/tf/layers/basic.py +++ b/returnn/tf/layers/basic.py @@ -1282,7 +1282,10 @@ def _get_common_input_position_axes(cls, input_data, position_data, old_gather_a :return: (common_axes_input, common_axes_position, specific_input_axes, specific_position_axes), all counted with batch dim. """ - is_equal_opts = dict(allow_same_spatial_dim=True, broadcast_matches=True) + from returnn.util import BehaviorVersion + is_equal_opts = dict(allow_same_spatial_dim=True) + if BehaviorVersion.get() < 11: + is_equal_opts["broadcast_matches"] = True all_dim_tags, tags_dict = Dim.get_all_dimension_tags([input_data, position_data], is_equal_opts=is_equal_opts) input_tags, pos_tags = tags_dict[input_data], tags_dict[position_data] specific_input_axes = [i for i, tag in enumerate(input_tags) if tag not in pos_tags and i != old_gather_axis] @@ -3771,7 +3774,10 @@ def get_out_data_from_opts(cls, name, axis, dim=1, sources=(), **kwargs): data = data.copy_as_batch_major() axis = cls._get_axis(data=data, axis=axis) - new_dim = SpatialDim("%s_expand_dims" % name, dim) + new_dim = Dim( + kind=Dim.Types.Feature if init_axis.lower() == "f" else Dim.Types.Spatial, + description="%s_expand_dims" % name, + dimension=dim) data = data.copy_template(name="%s_output" % name) data = data.copy_add_dim_by_tag(new_dim, unbroadcast=True, axis=axis) if isinstance(init_axis, str): @@ -6420,9 +6426,12 @@ def _auto_var_axes(source1, source2, red1, red2): :return: var1 tags, var2 tags :rtype: (list[Dim], list[Dim]) """ + from returnn.util import BehaviorVersion is_equal_opts = dict( treat_feature_as_spatial=True, allow_same_spatial_dim=True, - broadcast_matches=True, undefined_matches=True, derived_matches=True) + undefined_matches=True, derived_matches=True) + if BehaviorVersion.get() < 11: + is_equal_opts["broadcast_matches"] = True all_dim_tags, tags_dict = Dim.get_all_dimension_tags([source1, source2], is_equal_opts=is_equal_opts) tags1, tags2 = tags_dict[source1], tags_dict[source2] var1 = [tag for i, tag in enumerate(tags1) if tag not in tags2 and i not in red1] diff --git a/returnn/tf/util/data.py b/returnn/tf/util/data.py index 869093ba9..74a1c08a4 100644 --- a/returnn/tf/util/data.py +++ b/returnn/tf/util/data.py @@ -5049,6 +5049,7 @@ def get_common_data(cls, sources, ignore_feature_dim=False, allow_broadcast_all_ This is always a template, and a new copy. :rtype: Data|None """ + from returnn.util import BehaviorVersion if not sources: return None assert sources @@ -5067,8 +5068,10 @@ def get_common_data(cls, sources, ignore_feature_dim=False, allow_broadcast_all_ common.beam = SearchBeam.get_combined_beam(*[s.beam for s in sources]) is_equal_opts = dict( ignore_feature_dim=ignore_feature_dim, treat_feature_as_spatial=True, - allow_same_spatial_dim=True, broadcast_matches=True, + allow_same_spatial_dim=True, undefined_matches=True, derived_matches=True) + if BehaviorVersion.get() < 11: + is_equal_opts["broadcast_matches"] = True all_dim_tags, tags_dict = Dim.get_all_dimension_tags(sources, is_equal_opts=is_equal_opts) # Check for potential undefined tags, and replace those with defined tags if possible. for axis, dim_tag in enumerate(common.dim_tags): diff --git a/returnn/util/basic.py b/returnn/util/basic.py index e7270d1fd..c09e41f42 100644 --- a/returnn/util/basic.py +++ b/returnn/util/basic.py @@ -209,7 +209,7 @@ class BehaviorVersion: The version will be set after the config is defined at __main__.init_config() or Engine.__init__() """ - _latest_behavior_version = 10 + _latest_behavior_version = 11 _behavior_version = None # type: typing.Optional[int] @classmethod diff --git a/tests/test_TFNetworkLayer.py b/tests/test_TFNetworkLayer.py index 7c5e6709a..a03c68b81 100644 --- a/tests/test_TFNetworkLayer.py +++ b/tests/test_TFNetworkLayer.py @@ -920,7 +920,8 @@ def test_CombineLayer_broadcast(): net_dict = { "lin1": {"class": "linear", "activation": "sigmoid", "n_out": 5, "from": "data:data"}, "lin2": {"class": "linear", "activation": "sigmoid", "n_out": 1, "from": "data:data"}, - "combine": {"class": "combine", "kind": "add", "from": ["lin1", "lin2"]}, + "lin2_squeeze": {"class": "squeeze", "from": "lin2", "axis": "f"}, + "combine": {"class": "combine", "kind": "add", "from": ["lin1", "lin2_squeeze"]}, "output": {"class": "softmax", "loss": "ce", "from": "combine"} } config = Config({"debug_print_layer_output_template": True}) @@ -939,7 +940,7 @@ def test_CombineLayer_broadcast_multiple(): with make_scope() as session: net_dict = { "p1": {"class": "variable", "shape": (5, 5, 3), "add_batch_axis": False}, - "p2": {"class": "variable", "shape": (5, 1, 1), "add_batch_axis": False}, + "p2": {"class": "variable", "shape": (5,), "add_batch_axis": False}, "combine": {"class": "combine", "kind": "add", "from": ["p1", "p2"]}, "output": {"class": "softmax", "loss": "ce", "from": "combine"} } @@ -1275,7 +1276,7 @@ def test_CombineLayer_time_broadcast(): config = Config({ "debug_print_layer_output_template": True, "extern_data": { - "in1": {"shape": (n_features, 1), "batch_dim_axis": None, "time_dim_axis": None, "feature_dim_axis": 0}, + "in1": {"shape": (n_features,), "batch_dim_axis": None, "time_dim_axis": None, "feature_dim_axis": 0}, "in2": {"shape": (n_features, None), "batch_dim_axis": 0, "time_dim_axis": 2} } }) @@ -1299,7 +1300,7 @@ def test_CombineLayer_time_broadcast_swapped(): "debug_print_layer_output_template": True, "extern_data": { "in1": {"shape": (n_features, None), "batch_dim_axis": 0, "time_dim_axis": 2}, - "in2": {"shape": (n_features, 1), "batch_dim_axis": None, "time_dim_axis": None, "feature_dim_axis": 0}, + "in2": {"shape": (n_features,), "batch_dim_axis": None, "time_dim_axis": None, "feature_dim_axis": 0}, } }) network = TFNetwork(config=config, train_flag=True) @@ -3399,6 +3400,24 @@ def test_GatherLayer_search_beam(): "initial_output": 0}}}}) +def test_GatherLayer_broadcast_dim(): + from returnn.tf.util.data import batch_dim + head_dim = SpatialDim("head", 1) # previously, this dim would match all others and therefore fail. + round_dim = SpatialDim("round", 2) + chunk_dim = SpatialDim("chunk") + time_dim = SpatialDim("time") + config = Config({"extern_data": { + "source": {"dim_tags": [batch_dim, head_dim, time_dim]}, + "position": {"dim_tags": [batch_dim, head_dim, round_dim, chunk_dim], "dtype": "int32"}}, + "debug_print_layer_output_template": True}) + net = TFNetwork(config=config) + net.construct_from_dict({ + "output": { + 'class': 'gather', 'from': 'data:source', 'position': 'data:position', 'axis': time_dim, + 'out_shape': {batch_dim, head_dim, round_dim, chunk_dim}} + }) + + def test_SliceNdLayer(): n_batch = 5 n_time = 7 diff --git a/tests/test_TFUtil.py b/tests/test_TFUtil.py index 18e4408ab..aa47543a4 100644 --- a/tests/test_TFUtil.py +++ b/tests/test_TFUtil.py @@ -608,7 +608,7 @@ def test_Data_get_common_data_extra_static_spatial(): def test_Data_get_common_data_broadcast_multiple(): d1 = Data(name='d_orig', shape=(5, 5, 3), dtype='float32', batch_dim_axis=None) - d2 = Data(name='d_bc', shape=(5, 1, 1), dtype='float32', batch_dim_axis=None) + d2 = Data(name='d_bc', shape=(5,), dtype='float32', batch_dim_axis=None) common = Data.get_common_data([d1, d2]) assert d1.shape == common.shape