From 145c0ea5bfb9647d95625b3612f5da65ac312889 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Fri, 11 Oct 2024 08:37:57 +0200 Subject: [PATCH 01/17] Added tests for the `_read_and_write_sets()`. --- tests/sdfg/state_test.py | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index 7ba43ac4c0..1fac1d56f4 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -1,5 +1,6 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. import dace +from dace import subsets as sbs from dace.transformation.helpers import find_sdfg_control_flow @@ -43,6 +44,7 @@ def test_read_write_set_y_formation(): assert 'B' not in state.read_and_write_sets()[0] + def test_deepcopy_state(): N = dace.symbol('N') @@ -58,6 +60,92 @@ def double_loop(arr: dace.float32[N]): sdfg.validate() +def test_read_and_write_set_filter(): + sdfg = dace.SDFG('graph') + state = sdfg.add_state('state') + sdfg.add_array('A', [2, 2], dace.float64) + sdfg.add_scalar('B', dace.float64) + sdfg.add_array('C', [2, 2], dace.float64) + A, B, C = (state.add_access(name) for name in ('A', 'B', 'C')) + + state.add_nedge( + A, + B, + dace.Memlet("B[0] -> 0, 0"), + ) + state.add_nedge( + B, + C, + # If the Memlet would be `B[0] -> 1, 1` it would then be filtered out. + # This is an intentional behaviour for compatibility. + dace.Memlet("C[1, 1] -> 0"), + ) + state.add_nedge( + B, + C, + dace.Memlet("B[0] -> 0, 0"), + ) + sdfg.validate() + + expected_reads = { + "A": [sbs.Range.from_string("0, 0")], + # See comment in `state._read_and_write_sets()` why "B" is here + # it should actually not, but it is a bug. + "B": [sbs.Range.from_string("0")], + } + expected_writes = { + # However, this should always be here. + "B": [sbs.Range.from_string("0")], + "C": [sbs.Range.from_string("0, 0"), sbs.Range.from_string("1, 1")], + } + read_set, write_set = state._read_and_write_sets() + + for expected_sets, computed_sets in [(expected_reads, read_set), (expected_writes, write_set)]: + assert expected_sets.keys() == computed_sets.keys(), f"Expected the set to contain '{expected_sets.keys()}' but got '{computed_sets.keys()}'." + for access_data in expected_sets.keys(): + for exp in expected_sets[access_data]: + found_match = False + for res in computed_sets[access_data]: + if res == exp: + found_match = True + break + assert found_match, f"Could not find the subset '{exp}' only got '{computed_sets}'" + + +def test_read_and_write_set_selection(): + sdfg = dace.SDFG('graph') + state = sdfg.add_state('state') + sdfg.add_array('A', [2, 2], dace.float64) + sdfg.add_scalar('B', dace.float64) + A, B = (state.add_access(name) for name in ('A', 'B')) + + state.add_nedge( + A, + B, + dace.Memlet("A[0, 0]"), + ) + sdfg.validate() + + expected_reads = { + "A": [sbs.Range.from_string("0, 0")], + } + expected_writes = { + "B": [sbs.Range.from_string("0")], + } + read_set, write_set = state._read_and_write_sets() + + for expected_sets, computed_sets in [(expected_reads, read_set), (expected_writes, write_set)]: + assert expected_sets.keys() == computed_sets.keys(), f"Expected the set to contain '{expected_sets.keys()}' but got '{computed_sets.keys()}'." + for access_data in expected_sets.keys(): + for exp in expected_sets[access_data]: + found_match = False + for res in computed_sets[access_data]: + if res == exp: + found_match = True + break + assert found_match, f"Could not find the subset '{exp}' only got '{computed_sets}'" + + def test_add_mapped_tasklet(): sdfg = dace.SDFG("test_add_mapped_tasklet") state = sdfg.add_state(is_start_block=True) @@ -82,6 +170,8 @@ def test_add_mapped_tasklet(): if __name__ == '__main__': + test_read_and_write_set_selection() + test_read_and_write_set_filter() test_read_write_set() test_read_write_set_y_formation() test_deepcopy_state() From 38e748bfb2576103c0cf490fcb3f77a9c20361d9 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Fri, 11 Oct 2024 08:49:42 +0200 Subject: [PATCH 02/17] Added the fix from my MapFusion PR. --- dace/sdfg/state.py | 111 +++++++++++++++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 33 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 8d443e6beb..a9c99da7f9 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -745,51 +745,96 @@ def update_if_not_none(dic, update): return defined_syms + def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, List[Subset]]]: """ Determines what data is read and written in this subgraph, returning dictionaries from data containers to all subsets that are read/written. """ + from dace.sdfg import utils # Avoid cyclic import + + # Ensures that the `{src,dst}_subset` are properly set. + # TODO: find where the problems are + for edge in self.edges(): + edge.data.try_initialize(self.sdfg, self, edge) + read_set = collections.defaultdict(list) write_set = collections.defaultdict(list) - from dace.sdfg import utils # Avoid cyclic import - subgraphs = utils.concurrent_subgraphs(self) - for sg in subgraphs: - rs = collections.defaultdict(list) - ws = collections.defaultdict(list) + + for subgraph in utils.concurrent_subgraphs(self): + subgraph_read_set = collections.defaultdict(list) # read and write set of this subgraph. + subgraph_write_set = collections.defaultdict(list) # Traverse in topological order, so data that is written before it # is read is not counted in the read set - for n in utils.dfs_topological_sort(sg, sources=sg.source_nodes()): - if isinstance(n, nd.AccessNode): - in_edges = sg.in_edges(n) - out_edges = sg.out_edges(n) - # Filter out memlets which go out but the same data is written to the AccessNode by another memlet - for out_edge in list(out_edges): - for in_edge in list(in_edges): - if (in_edge.data.data == out_edge.data.data - and in_edge.data.dst_subset.covers(out_edge.data.src_subset)): - out_edges.remove(out_edge) - break - - for e in in_edges: - # skip empty memlets - if e.data.is_empty(): - continue - # Store all subsets that have been written - ws[n.data].append(e.data.subset) - for e in out_edges: - # skip empty memlets - if e.data.is_empty(): + # TODO: This only works if every data descriptor is only once in a path. + for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()): + if not isinstance(n, nd.AccessNode): + # Read and writes can only be done through access nodes, + # so ignore every other node. + continue + + # Get a list of all incoming (writes) and outgoing (reads) edges of the + # access node, ignore all empty memlets as they do not carry any data. + in_edges = [in_edge for in_edge in subgraph.in_edges(n) if not in_edge.data.is_empty()] + out_edges = [out_edge for out_edge in subgraph.out_edges(n) if not out_edge.data.is_empty()] + + # Extract the subsets that describes where we read and write the data + # and store them for the later filtering. + # NOTE: In certain cases the corresponding subset might be None, in this case + # we assume that the whole array is written, which is the default behaviour. + ac_desc = n.desc(self.sdfg) + ac_size = ac_desc.total_size + in_subsets = dict() + for in_edge in in_edges: + # Ensure that if the destination subset is not given, our assumption, that the + # whole array is written to, is valid, by testing if the memlet transfers the + # whole array. + assert (in_edge.data.dst_subset is not None) or (in_edge.data.num_elements() == ac_size) + in_subsets[in_edge] = ( + sbs.Range.from_array(ac_desc) + if in_edge.data.dst_subset is None + else in_edge.data.dst_subset + ) + out_subsets = dict() + for out_edge in out_edges: + assert (out_edge.data.src_subset is not None) or (out_edge.data.num_elements() == ac_size) + out_subsets[out_edge] = ( + sbs.Range.from_array(ac_desc) + if out_edge.data.src_subset is None + else out_edge.data.src_subset + ) + + # If a memlet reads a particular region of data from the access node and there + # exists a memlet at the same access node that writes to the same region, then + # this read is ignored, and not included in the final read set, but only + # accounted fro in the write set. See also note below. + # TODO: Handle the case when multiple disjoint writes are needed to cover the read. + for out_edge in list(out_edges): + for in_edge in in_edges: + if out_edge.data.data != in_edge.data.data: + # NOTE: This check does not make any sense, and is in my (@philip-paul-mueller) + # view wrong. As it will filter out some accesses but not all, which one solely + # depends on how the memelts were created, i.e. to which container their `data` + # attribute is associated to. See also [issue #1643](https://github.com/spcl/dace/issues/1643). continue - rs[n.data].append(e.data.subset) - # Union all subgraphs, so an array that was excluded from the read - # set because it was written first is still included if it is read - # in another subgraph - for data, accesses in rs.items(): + if in_subsets[in_edge].covers(out_subsets[out_edge]): + out_edges.remove(out_edge) + break + + # Update the read and write sets of the subgraph. + if in_edges: + subgraph_write_set[n.data].extend(in_subsets.values()) + if out_edges: + subgraph_read_set[n.data].extend(out_subsets[out_edge] for out_edge in out_edges) + + # Add the subgraph's read and write set to the final ones. + for data, accesses in subgraph_read_set.items(): read_set[data] += accesses - for data, accesses in ws.items(): + for data, accesses in subgraph_write_set.items(): write_set[data] += accesses - return read_set, write_set + + return copy.deepcopy((read_set, write_set)) + def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]: """ From 3748c03cd863c3f5241ab5e0e03164d3c6c61319 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Fri, 11 Oct 2024 08:55:51 +0200 Subject: [PATCH 03/17] Now made `read_and_write_sets()` fully adhere to their own definition. --- dace/sdfg/state.py | 6 ------ tests/sdfg/state_test.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index a9c99da7f9..ba853d088d 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -811,12 +811,6 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, # TODO: Handle the case when multiple disjoint writes are needed to cover the read. for out_edge in list(out_edges): for in_edge in in_edges: - if out_edge.data.data != in_edge.data.data: - # NOTE: This check does not make any sense, and is in my (@philip-paul-mueller) - # view wrong. As it will filter out some accesses but not all, which one solely - # depends on how the memelts were created, i.e. to which container their `data` - # attribute is associated to. See also [issue #1643](https://github.com/spcl/dace/issues/1643). - continue if in_subsets[in_edge].covers(out_subsets[out_edge]): out_edges.remove(out_edge) break diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index 1fac1d56f4..7d74ae6bcc 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -76,8 +76,6 @@ def test_read_and_write_set_filter(): state.add_nedge( B, C, - # If the Memlet would be `B[0] -> 1, 1` it would then be filtered out. - # This is an intentional behaviour for compatibility. dace.Memlet("C[1, 1] -> 0"), ) state.add_nedge( @@ -89,12 +87,8 @@ def test_read_and_write_set_filter(): expected_reads = { "A": [sbs.Range.from_string("0, 0")], - # See comment in `state._read_and_write_sets()` why "B" is here - # it should actually not, but it is a bug. - "B": [sbs.Range.from_string("0")], } expected_writes = { - # However, this should always be here. "B": [sbs.Range.from_string("0")], "C": [sbs.Range.from_string("0, 0"), sbs.Range.from_string("1, 1")], } From 3ab4bf3e379ea96fbb7635904cc8b320215fc558 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Fri, 11 Oct 2024 11:27:47 +0200 Subject: [PATCH 04/17] Updated a test for the `PruneConnectors` transformation. Before in this case the transformation was not able to be applied. The reason was because of the behaviour of the `SDFGState.read_and_write_sets()` function. However, now with the fix of the [PR#1678](https://github.com/spcl/dace/pull/1678) the transformation became applicable. --- tests/transformations/prune_connectors_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py index 4026ec3e1c..8ec0d3615a 100644 --- a/tests/transformations/prune_connectors_test.py +++ b/tests/transformations/prune_connectors_test.py @@ -153,7 +153,6 @@ def _make_read_write_sdfg( Depending on `conforming_memlet` the memlet that copies `inner_A` into `inner_B` will either be associated to `inner_A` (`True`) or `inner_B` (`False`). - This choice has consequences on if the transformation can apply or not. Notes: This is most likely a bug, see [issue#1643](https://github.com/spcl/dace/issues/1643), @@ -421,7 +420,6 @@ def test_prune_connectors_with_dependencies(): def test_read_write_1(): - # Because the memlet is conforming, we can apply the transformation. sdfg, nsdfg = _make_read_write_sdfg(True) assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) @@ -429,10 +427,13 @@ def test_read_write_1(): def test_read_write_2(): - # Because the memlet is not conforming, we can not apply the transformation. + # In previous versions of DaCe the transformation could not be applied in the + # case of a not conforming Memlet. + # See [PR#1678](https://github.com/spcl/dace/pull/1678) sdfg, nsdfg = _make_read_write_sdfg(False) - assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) + assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) + sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True) if __name__ == "__main__": From b4feddfe8459dcb49e46ef416512e022624d2e60 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Fri, 11 Oct 2024 13:18:49 +0200 Subject: [PATCH 05/17] Added code to `test_more_than_a_map` to ensure that the transformation does not change the behaviour of teh output. --- .../move_loop_into_map_test.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py index dca775bb7a..de610d0ca8 100644 --- a/tests/transformations/move_loop_into_map_test.py +++ b/tests/transformations/move_loop_into_map_test.py @@ -2,6 +2,7 @@ import dace from dace.transformation.interstate import MoveLoopIntoMap import unittest +import copy import numpy as np I = dace.symbol("I") @@ -170,7 +171,26 @@ def test_more_than_a_map(self): body.add_nedge(aread, oread, dace.Memlet.from_array('A', aarr)) body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr)) sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1') - count = sdfg.apply_transformations(MoveLoopIntoMap) + + sdfg_args_ref = { + "A": np.array(np.random.rand(3, 3), dtype=np.float64), + "B": np.array(np.random.rand(3, 3), dtype=np.float64), + "out": np.array(np.random.rand(3, 3), dtype=np.float64), + } + sdfg_args_res = copy.deepcopy(sdfg_args_ref) + + # Perform the reference execution + sdfg(**sdfg_args_ref) + + # Apply the transformation and execute the SDFG again. + count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True) + sdfg(**sdfg_args_res) + + for name in sdfg_args_ref.keys(): + self.assertTrue( + np.allclose(sdfg_args_ref[name], sdfg_args_res[name]), + f"Miss match for {name}", + ) self.assertFalse(count > 0) def test_more_than_a_map_1(self): From 70fa3db43020073293dca5bdf9d2bf6649c93814 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Mon, 14 Oct 2024 09:29:58 +0200 Subject: [PATCH 06/17] Added the new memlet creation syntax. --- tests/sdfg/state_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index 7d74ae6bcc..33e02088a4 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -71,17 +71,17 @@ def test_read_and_write_set_filter(): state.add_nedge( A, B, - dace.Memlet("B[0] -> 0, 0"), + dace.Memlet("B[0] -> [0, 0]"), ) state.add_nedge( B, C, - dace.Memlet("C[1, 1] -> 0"), + dace.Memlet("C[1, 1] -> [0]"), ) state.add_nedge( B, C, - dace.Memlet("B[0] -> 0, 0"), + dace.Memlet("B[0] -> [0, 0]"), ) sdfg.validate() From b187a8260ba197280b7b1c3cd8075a6a0f47a810 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Mon, 14 Oct 2024 13:15:13 +0200 Subject: [PATCH 07/17] Modified some comments to make them clearer. --- dace/sdfg/state.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 855cf83f62..53f8d98491 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -766,7 +766,8 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, subgraph_write_set = collections.defaultdict(list) # Traverse in topological order, so data that is written before it # is read is not counted in the read set - # TODO: This only works if every data descriptor is only once in a path. + # NOTE: Each AccessNode is processed individually. Thus, if an array appears multiple + # times in a path, the individual results are combined, without further processing. for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()): if not isinstance(n, nd.AccessNode): # Read and writes can only be done through access nodes, @@ -808,7 +809,8 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, # exists a memlet at the same access node that writes to the same region, then # this read is ignored, and not included in the final read set, but only # accounted fro in the write set. See also note below. - # TODO: Handle the case when multiple disjoint writes are needed to cover the read. + # TODO: Handle the case when multiple disjoint writes would be needed to cover the + # read. E.g. edges write `0:10` and `10:20` but the read happens at `5:15`. for out_edge in list(out_edges): for in_edge in in_edges: if in_subsets[in_edge].covers(out_subsets[out_edge]): From 9c6cb6c91cd924cb9269c727192778106fe36a17 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Mon, 14 Oct 2024 13:15:48 +0200 Subject: [PATCH 08/17] Modified the `tests/transformations/move_loop_into_map_test.py::test_more_than_a_map()` test. The test now assumes that it can be applied, as it was discussed on github this should be the correct behaviour. Furthermore, there is also a test which ensures that the same values are computed. This commit also adds an additional test (``tests/transformations/move_loop_into_map_test.py::test_more_than_a_map_4()`) which is essentially the same. However, in one of its Memlet it is a bit different and therefore the transformation can not be applied. --- .../move_loop_into_map_test.py | 58 ++++++++++++++++++- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py index de610d0ca8..fbb05d30f5 100644 --- a/tests/transformations/move_loop_into_map_test.py +++ b/tests/transformations/move_loop_into_map_test.py @@ -148,7 +148,10 @@ def test_apply_multiple_times_1(self): self.assertTrue(np.allclose(val, ref)) def test_more_than_a_map(self): - """ `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency. """ + """ + `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency. + However, there is no dependency. + """ sdfg = dace.SDFG('more_than_a_map') _, aarr = sdfg.add_array('A', (3, 3), dace.float64) _, barr = sdfg.add_array('B', (3, 3), dace.float64) @@ -168,7 +171,7 @@ def test_more_than_a_map(self): external_edges=True, input_nodes=dict(out=oread, B=bread), output_nodes=dict(tmp=twrite)) - body.add_nedge(aread, oread, dace.Memlet.from_array('A', aarr)) + body.add_nedge(aread, oread, dace.Memlet.from_array('A', oarr)) body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr)) sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1') @@ -191,7 +194,7 @@ def test_more_than_a_map(self): np.allclose(sdfg_args_ref[name], sdfg_args_res[name]), f"Miss match for {name}", ) - self.assertFalse(count > 0) + self.assertTrue(count > 0) def test_more_than_a_map_1(self): """ @@ -289,6 +292,55 @@ def test_more_than_a_map_3(self): count = sdfg.apply_transformations(MoveLoopIntoMap) self.assertFalse(count > 0) + def test_more_than_a_map_4(self): + """ + The test is very similar to `test_more_than_a_map()`. But a memlet is different + which leads to a RW dependency, which blocks the transformation. + """ + sdfg = dace.SDFG('more_than_a_map') + _, aarr = sdfg.add_array('A', (3, 3), dace.float64) + _, barr = sdfg.add_array('B', (3, 3), dace.float64) + _, oarr = sdfg.add_array('out', (3, 3), dace.float64) + _, tarr = sdfg.add_array('tmp', (3, 3), dace.float64, transient=True) + body = sdfg.add_state('map_state') + aread = body.add_access('A') + oread = body.add_access('out') + bread = body.add_access('B') + twrite = body.add_access('tmp') + owrite = body.add_access('out') + body.add_mapped_tasklet('op', + dict(i='0:3', j='0:3'), + dict(__in1=dace.Memlet('out[i, j]'), __in2=dace.Memlet('B[i, j]')), + '__out = __in1 - __in2', + dict(__out=dace.Memlet('tmp[i, j]')), + external_edges=True, + input_nodes=dict(out=oread, B=bread), + output_nodes=dict(tmp=twrite)) + body.add_nedge(aread, oread, dace.Memlet('A[Mod(_, 3), 0:3] -> [Mod(_ + 1, 3), 0:3]', aarr)) + body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr)) + sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1') + + sdfg_args_ref = { + "A": np.array(np.random.rand(3, 3), dtype=np.float64), + "B": np.array(np.random.rand(3, 3), dtype=np.float64), + "out": np.array(np.random.rand(3, 3), dtype=np.float64), + } + sdfg_args_res = copy.deepcopy(sdfg_args_ref) + + # Perform the reference execution + sdfg(**sdfg_args_ref) + + # Apply the transformation and execute the SDFG again. + count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True) + sdfg(**sdfg_args_res) + + for name in sdfg_args_ref.keys(): + self.assertTrue( + np.allclose(sdfg_args_ref[name], sdfg_args_res[name]), + f"Miss match for {name}", + ) + self.assertFalse(count > 0) + if __name__ == '__main__': unittest.main() From b7fe24219455e980b3c9f1ca6de06291f9dc986a Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Tue, 22 Oct 2024 14:43:50 +0200 Subject: [PATCH 09/17] Added a test to highlights the error. Currently we only see the error in `tests/numpy/ufunc_support_test.py::test_ufunc_add_accumulate_simple` if the auto optimizations are enabled. --- .../refine_nested_access_test.py | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py index d9fb9a7392..94c02a88de 100644 --- a/tests/transformations/refine_nested_access_test.py +++ b/tests/transformations/refine_nested_access_test.py @@ -156,7 +156,108 @@ def inner_sdfg(A: dace.int32[5], B: dace.int32[5, 5], idx_a: int, idx_b: int): assert np.allclose(ref, val) +def _make_rna_read_and_write_set_sdfg(diff_in_out: bool) -> dace.SDFG: + """Generates the SDFG for the `test_rna_read_and_write_sets_*()` tests. + + If `diff_in_out` is `False` then the output is also used as temporary storage + within the nested SDFG. Because of the definition of the read/write sets, + this usage of the temporary storage is not picked up and it is only considered + as write set. + + If `diff_in_out` is true, then a different storage container, which is classified + as output, is used as temporary storage. + + This test was added during [PR#1678](https://github.com/spcl/dace/pull/1678). + """ + + def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG: + sdfg = dace.SDFG("inner_sdfg") + state = sdfg.add_state(is_start_block=True) + sdfg.add_array("A", dtype=dace.float64, shape=(2,), transient=False) + sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False) + + A = state.add_access("A") + T1_input = state.add_access("T1") + if diff_in_out: + sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False) + T1_output = state.add_access("T2") + else: + T1_output = state.add_access("T1") + + tsklt = state.add_tasklet( + "comp", + inputs={"__in1": None, "__in2": None}, + outputs={"__out": None}, + code="__out = __in1 + __in2", + ) + + state.add_edge(A, None, tsklt, "__in1", dace.Memlet("A[1]")) + # An alternative would be to write to a different location here. + # Then, the data would be added to the access node. + state.add_edge(A, None, T1_input, None, dace.Memlet("A[0] -> [0]")) + state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet("T1[0]")) + state.add_edge(tsklt, "__out", T1_output, None, dace.Memlet(T1_output.data + "[1]")) + return sdfg + + sdfg = dace.SDFG("Parent_SDFG") + state = sdfg.add_state(is_start_block=True) + + sdfg.add_array("A", dtype=dace.float64, shape=(2,), transient=False) + sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False) + sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False) + A = state.add_access("A") + T1 = state.add_access("T1") + + nested_sdfg = _make_nested_sdfg(diff_in_out) + + nsdfg = state.add_nested_sdfg( + nested_sdfg, + parent=sdfg, + inputs={"A"}, + outputs={"T2", "T1"} if diff_in_out else {"T1"}, + symbol_mapping={}, + ) + + state.add_edge(A, None, nsdfg, "A", dace.Memlet("A[0:2]")) + state.add_edge(nsdfg, "T1", T1, None, dace.Memlet("T1[0:2]")) + + if diff_in_out: + state.add_edge(nsdfg, "T2", state.add_access("T2"), None, dace.Memlet("T2[0:2]")) + sdfg.validate() + return sdfg + + +def test_rna_read_and_write_sets_doule_use(): + """ + NOTE: Under the current definition of the read/write sets this test will fail. + """ + + # The output is used also as temporary storage. + sdfg = _make_rna_read_and_write_set_sdfg(False) + nb_applied = sdfg.apply_transformations_repeated( + [RefineNestedAccess], + validate=True, + validate_all=True, + ) + assert nb_applied > 0 + + +def test_rna_read_and_write_sets_different_storage(): + + # There is a dedicated temporary storage used. + sdfg = _make_rna_read_and_write_set_sdfg(True) + + nb_applied = sdfg.apply_transformations_repeated( + [RefineNestedAccess], + validate=True, + validate_all=True, + ) + assert nb_applied > 0 + + if __name__ == '__main__': test_refine_dataflow() test_refine_interstate() test_free_symbols_only_by_indices() + test_rna_read_and_write_sets_different_storage() + test_rna_read_and_write_sets_doule_use() From b546b0742104464de08a3ee2840c073be2a52605 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Tue, 22 Oct 2024 14:50:55 +0200 Subject: [PATCH 10/17] I now removed the filtering inside the read and write set. This is _just_ a proposal. --- dace/sdfg/state.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 53f8d98491..09e7607d65 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -761,13 +761,15 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, read_set = collections.defaultdict(list) write_set = collections.defaultdict(list) + # NOTE: In a previous version a _single_ read (i.e. leaving Memlet) that was + # fully covered by a single write (i.e. an incoming Memlet) was removed from + # the read set and only the write survived. However, this was never fully + # implemented nor correctly implemented and caused problems. + # So this filtering was removed. + for subgraph in utils.concurrent_subgraphs(self): subgraph_read_set = collections.defaultdict(list) # read and write set of this subgraph. subgraph_write_set = collections.defaultdict(list) - # Traverse in topological order, so data that is written before it - # is read is not counted in the read set - # NOTE: Each AccessNode is processed individually. Thus, if an array appears multiple - # times in a path, the individual results are combined, without further processing. for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()): if not isinstance(n, nd.AccessNode): # Read and writes can only be done through access nodes, @@ -805,18 +807,6 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, else out_edge.data.src_subset ) - # If a memlet reads a particular region of data from the access node and there - # exists a memlet at the same access node that writes to the same region, then - # this read is ignored, and not included in the final read set, but only - # accounted fro in the write set. See also note below. - # TODO: Handle the case when multiple disjoint writes would be needed to cover the - # read. E.g. edges write `0:10` and `10:20` but the read happens at `5:15`. - for out_edge in list(out_edges): - for in_edge in in_edges: - if in_subsets[in_edge].covers(out_subsets[out_edge]): - out_edges.remove(out_edge) - break - # Update the read and write sets of the subgraph. if in_edges: subgraph_write_set[n.data].extend(in_subsets.values()) From ae205909ca3a78cfea7203dc037461f30d08ceb8 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 09:32:00 +0200 Subject: [PATCH 11/17] Fixed `state_test.py::test_read_and_write_set_filter`. Because of the removed filtering, `B` is now part of the read set as well. --- tests/sdfg/state_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index 33e02088a4..7a6894c8ac 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -87,6 +87,7 @@ def test_read_and_write_set_filter(): expected_reads = { "A": [sbs.Range.from_string("0, 0")], + "B": [sbs.Range.from_string("0")], } expected_writes = { "B": [sbs.Range.from_string("0")], From db211fa169732e87c899215ebccbbbd3bd583c03 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 09:35:13 +0200 Subject: [PATCH 12/17] Fixed the `state_test.py::test_read_write_set` test. Because of the removed filtering `B` is no longer removed from the read set. However, because now the test has become quite useless. We also see that the test was specifically constructed in such a way that the filtering applies. Otherwise it would never triggered. --- tests/sdfg/state_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index 7a6894c8ac..efc11cc844 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -20,7 +20,9 @@ def test_read_write_set(): state.add_memlet_path(rw_b, task2, dst_conn='B', memlet=dace.Memlet('B[2]')) state.add_memlet_path(task2, write_c, src_conn='C', memlet=dace.Memlet('C[2]')) - assert 'B' not in state.read_and_write_sets()[0] + read_set, write_set = state.read_and_write_sets() + assert {'B', 'A'} == read_set + assert {'C', 'B'} == write_set def test_read_write_set_y_formation(): From 570437b18ac3a780c0150fbe923babd61f354cb4 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 09:40:45 +0200 Subject: [PATCH 13/17] Fixed the `state_test.py::test_read_write_set_y_formation` test. Because of the removed filtering `B` is no longer removed from the read set. However, because now the test has become quite useless. We also see that the test was specifically constructed in such a way that the filtering applies. Otherwise it would never triggered. --- tests/sdfg/state_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index efc11cc844..4bde3788e0 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -44,7 +44,9 @@ def test_read_write_set_y_formation(): state.add_memlet_path(rw_b, task2, dst_conn='B', memlet=dace.Memlet(data='B', subset='0')) state.add_memlet_path(task2, write_c, src_conn='C', memlet=dace.Memlet(data='C', subset='0')) - assert 'B' not in state.read_and_write_sets()[0] + read_set, write_set = state.read_and_write_sets() + assert {'B', 'A'} == read_set + assert {'C', 'B'} == write_set def test_deepcopy_state(): From cb80f0b3288cedd7ea89acb035cae7873f7afe2f Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 14:34:04 +0200 Subject: [PATCH 14/17] Fixed `move_loop_into_map_test.py::MoveLoopIntoMapTest::test_more_than_a_map`. Because the filtering is now not applied, like it was originally, the transformation does no longer apply. However, this is the historical expected behaviour. --- .../move_loop_into_map_test.py | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py index fbb05d30f5..ad51941cb0 100644 --- a/tests/transformations/move_loop_into_map_test.py +++ b/tests/transformations/move_loop_into_map_test.py @@ -150,7 +150,9 @@ def test_apply_multiple_times_1(self): def test_more_than_a_map(self): """ `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency. - However, there is no dependency. + + Note that there is actually no dependency, however, the transformation, because it relies + on `SDFGState.read_and_write_sets()` it can not detect this and can thus not be applied. """ sdfg = dace.SDFG('more_than_a_map') _, aarr = sdfg.add_array('A', (3, 3), dace.float64) @@ -175,26 +177,8 @@ def test_more_than_a_map(self): body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr)) sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1') - sdfg_args_ref = { - "A": np.array(np.random.rand(3, 3), dtype=np.float64), - "B": np.array(np.random.rand(3, 3), dtype=np.float64), - "out": np.array(np.random.rand(3, 3), dtype=np.float64), - } - sdfg_args_res = copy.deepcopy(sdfg_args_ref) - - # Perform the reference execution - sdfg(**sdfg_args_ref) - - # Apply the transformation and execute the SDFG again. count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True) - sdfg(**sdfg_args_res) - - for name in sdfg_args_ref.keys(): - self.assertTrue( - np.allclose(sdfg_args_ref[name], sdfg_args_res[name]), - f"Miss match for {name}", - ) - self.assertTrue(count > 0) + self.assertTrue(count == 0) def test_more_than_a_map_1(self): """ From b704a4378cd8b03a1d1e5469458118c2fd70ce2d Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 14:39:03 +0200 Subject: [PATCH 15/17] Fixed `prune_connectors_test.py::test_read_write_*`. Because of the removed filtering the transformat can no longer apply. However, I originally added these tests to demonstrate this inconsistent behaviour in the first place. So removing them is now the right choice. This commit also combines them to `prune_connectors_test.py::test_read_write`. --- .../transformations/prune_connectors_test.py | 25 +++---------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py index 9995fbd305..b7b287d77e 100644 --- a/tests/transformations/prune_connectors_test.py +++ b/tests/transformations/prune_connectors_test.py @@ -331,16 +331,6 @@ def test_unused_retval_2(): assert np.allclose(a, 1) -def test_read_write_1(): - # Because the memlet is conforming, we can apply the transformation. - sdfg = _make_read_write_sdfg(True) - - assert first_mode == PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=osdfg, expr_index=0, permissive=False) - - - - - def test_prune_connectors_with_dependencies(): sdfg = dace.SDFG('tester') A, A_desc = sdfg.add_array('A', [4], dace.float64) @@ -419,21 +409,12 @@ def test_prune_connectors_with_dependencies(): assert np.allclose(np_d, np_d_) -def test_read_write_1(): +def test_read_write(): sdfg, nsdfg = _make_read_write_sdfg(True) + assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) - assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) - sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True) - - -def test_read_write_2(): - # In previous versions of DaCe the transformation could not be applied in the - # case of a not conforming Memlet. - # See [PR#1678](https://github.com/spcl/dace/pull/1678) sdfg, nsdfg = _make_read_write_sdfg(False) - - assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) - sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True) + assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) if __name__ == "__main__": From f74d6e81f979483ce849606f5911c9ece3adec3f Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 14:46:27 +0200 Subject: [PATCH 16/17] General improvements to some tests. Ensured that the return value is always accassable in the same way. Also ensured that the `test_rna_read_and_write_sets_different_storage()` test verifies that it still gives the same result. --- .../refine_nested_access_test.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py index 94c02a88de..38e17ad3af 100644 --- a/tests/transformations/refine_nested_access_test.py +++ b/tests/transformations/refine_nested_access_test.py @@ -177,12 +177,12 @@ def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG: sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False) A = state.add_access("A") - T1_input = state.add_access("T1") + T1_output = state.add_access("T1") if diff_in_out: sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False) - T1_output = state.add_access("T2") + T1_input = state.add_access("T2") else: - T1_output = state.add_access("T1") + T1_input = state.add_access("T1") tsklt = state.add_tasklet( "comp", @@ -195,7 +195,7 @@ def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG: # An alternative would be to write to a different location here. # Then, the data would be added to the access node. state.add_edge(A, None, T1_input, None, dace.Memlet("A[0] -> [0]")) - state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet("T1[0]")) + state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet(T1_input.data + "[0]")) state.add_edge(tsklt, "__out", T1_output, None, dace.Memlet(T1_output.data + "[1]")) return sdfg @@ -254,6 +254,16 @@ def test_rna_read_and_write_sets_different_storage(): ) assert nb_applied > 0 + args = { + "A": np.array(np.random.rand(2), dtype=np.float64, copy=True), + "T2": np.array(np.random.rand(2), dtype=np.float64, copy=True), + "T1": np.zeros(2, dtype=np.float64), + } + ref = args["A"][0] + args["A"][1] + sdfg(**args) + res = args["T1"][1] + assert np.allclose(res, ref), f"Expected '{ref}' but got '{res}'." + if __name__ == '__main__': test_refine_dataflow() From e1039243d847f964bdec5ced1b2cdebeb1c977c5 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Wed, 23 Oct 2024 14:50:14 +0200 Subject: [PATCH 17/17] Updated `refine_nested_access_test.py::test_rna_read_and_write_sets_doule_use` I realized that the transformation should not apply. The reason is because of all arguments to the nested SDFG element `0` is accessed. This means it can not be adjusted. --- tests/transformations/refine_nested_access_test.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py index 38e17ad3af..81640665ed 100644 --- a/tests/transformations/refine_nested_access_test.py +++ b/tests/transformations/refine_nested_access_test.py @@ -228,18 +228,15 @@ def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG: def test_rna_read_and_write_sets_doule_use(): - """ - NOTE: Under the current definition of the read/write sets this test will fail. - """ - - # The output is used also as temporary storage. + # The transformation does not apply because we access element `0` of both arrays that we + # pass inside the nested SDFG. sdfg = _make_rna_read_and_write_set_sdfg(False) nb_applied = sdfg.apply_transformations_repeated( [RefineNestedAccess], validate=True, validate_all=True, ) - assert nb_applied > 0 + assert nb_applied == 0 def test_rna_read_and_write_sets_different_storage():