From 145c0ea5bfb9647d95625b3612f5da65ac312889 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Fri, 11 Oct 2024 08:37:57 +0200
Subject: [PATCH 01/17] Added tests for the `_read_and_write_sets()`.

---
 tests/sdfg/state_test.py | 90 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index 7ba43ac4c0..1fac1d56f4 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -1,5 +1,6 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
+from dace import subsets as sbs
 from dace.transformation.helpers import find_sdfg_control_flow
 
 
@@ -43,6 +44,7 @@ def test_read_write_set_y_formation():
 
     assert 'B' not in state.read_and_write_sets()[0]
 
+
 def test_deepcopy_state():
     N = dace.symbol('N')
 
@@ -58,6 +60,92 @@ def double_loop(arr: dace.float32[N]):
     sdfg.validate()
 
 
+def test_read_and_write_set_filter():
+    sdfg = dace.SDFG('graph')
+    state = sdfg.add_state('state')
+    sdfg.add_array('A', [2, 2], dace.float64)
+    sdfg.add_scalar('B', dace.float64)
+    sdfg.add_array('C', [2, 2], dace.float64)
+    A, B, C = (state.add_access(name) for name in ('A', 'B', 'C'))
+
+    state.add_nedge(
+            A,
+            B,
+            dace.Memlet("B[0] -> 0, 0"),
+    )
+    state.add_nedge(
+            B,
+            C,
+            # If the Memlet would be `B[0] -> 1, 1` it would then be filtered out.
+            #   This is an intentional behaviour for compatibility.
+            dace.Memlet("C[1, 1] -> 0"),
+    )
+    state.add_nedge(
+            B,
+            C,
+            dace.Memlet("B[0] -> 0, 0"),
+    )
+    sdfg.validate()
+
+    expected_reads = {
+            "A": [sbs.Range.from_string("0, 0")],
+            # See comment in `state._read_and_write_sets()` why "B" is here
+            #   it should actually not, but it is a bug.
+            "B": [sbs.Range.from_string("0")],
+    }
+    expected_writes = {
+            # However, this should always be here.
+            "B": [sbs.Range.from_string("0")],
+            "C": [sbs.Range.from_string("0, 0"), sbs.Range.from_string("1, 1")],
+    }
+    read_set, write_set = state._read_and_write_sets()
+
+    for expected_sets, computed_sets in [(expected_reads, read_set), (expected_writes, write_set)]:
+        assert expected_sets.keys() == computed_sets.keys(), f"Expected the set to contain '{expected_sets.keys()}' but got '{computed_sets.keys()}'."
+        for access_data in expected_sets.keys():
+            for exp in expected_sets[access_data]:
+                found_match = False
+                for res in computed_sets[access_data]:
+                    if res == exp:
+                        found_match = True
+                        break
+                assert found_match, f"Could not find the subset '{exp}' only got '{computed_sets}'"
+
+
+def test_read_and_write_set_selection():
+    sdfg = dace.SDFG('graph')
+    state = sdfg.add_state('state')
+    sdfg.add_array('A', [2, 2], dace.float64)
+    sdfg.add_scalar('B', dace.float64)
+    A, B = (state.add_access(name) for name in ('A', 'B'))
+
+    state.add_nedge(
+            A,
+            B,
+            dace.Memlet("A[0, 0]"),
+    )
+    sdfg.validate()
+
+    expected_reads = {
+            "A": [sbs.Range.from_string("0, 0")],
+    }
+    expected_writes = {
+            "B": [sbs.Range.from_string("0")],
+    }
+    read_set, write_set = state._read_and_write_sets()
+
+    for expected_sets, computed_sets in [(expected_reads, read_set), (expected_writes, write_set)]:
+        assert expected_sets.keys() == computed_sets.keys(), f"Expected the set to contain '{expected_sets.keys()}' but got '{computed_sets.keys()}'."
+        for access_data in expected_sets.keys():
+            for exp in expected_sets[access_data]:
+                found_match = False
+                for res in computed_sets[access_data]:
+                    if res == exp:
+                        found_match = True
+                        break
+                assert found_match, f"Could not find the subset '{exp}' only got '{computed_sets}'"
+
+
 def test_add_mapped_tasklet():
     sdfg = dace.SDFG("test_add_mapped_tasklet")
     state = sdfg.add_state(is_start_block=True)
@@ -82,6 +170,8 @@ def test_add_mapped_tasklet():
 
 
 if __name__ == '__main__':
+    test_read_and_write_set_selection()
+    test_read_and_write_set_filter()
     test_read_write_set()
     test_read_write_set_y_formation()
     test_deepcopy_state()

From 38e748bfb2576103c0cf490fcb3f77a9c20361d9 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Fri, 11 Oct 2024 08:49:42 +0200
Subject: [PATCH 02/17] Added the fix from my MapFusion PR.

---
 dace/sdfg/state.py | 111 +++++++++++++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 33 deletions(-)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 8d443e6beb..a9c99da7f9 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -745,51 +745,96 @@ def update_if_not_none(dic, update):
 
         return defined_syms
 
+
     def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, List[Subset]]]:
         """
         Determines what data is read and written in this subgraph, returning
         dictionaries from data containers to all subsets that are read/written.
         """
+        from dace.sdfg import utils  # Avoid cyclic import
+
+        # Ensures that the `{src,dst}_subset` are properly set.
+        #  TODO: find where the problems are
+        for edge in self.edges():
+            edge.data.try_initialize(self.sdfg, self, edge)
+
         read_set = collections.defaultdict(list)
         write_set = collections.defaultdict(list)
-        from dace.sdfg import utils  # Avoid cyclic import
-        subgraphs = utils.concurrent_subgraphs(self)
-        for sg in subgraphs:
-            rs = collections.defaultdict(list)
-            ws = collections.defaultdict(list)
+
+        for subgraph in utils.concurrent_subgraphs(self):
+            subgraph_read_set = collections.defaultdict(list)  # read and write set of this subgraph.
+            subgraph_write_set = collections.defaultdict(list)
             # Traverse in topological order, so data that is written before it
             # is read is not counted in the read set
-            for n in utils.dfs_topological_sort(sg, sources=sg.source_nodes()):
-                if isinstance(n, nd.AccessNode):
-                    in_edges = sg.in_edges(n)
-                    out_edges = sg.out_edges(n)
-                    # Filter out memlets which go out but the same data is written to the AccessNode by another memlet
-                    for out_edge in list(out_edges):
-                        for in_edge in list(in_edges):
-                            if (in_edge.data.data == out_edge.data.data
-                                    and in_edge.data.dst_subset.covers(out_edge.data.src_subset)):
-                                out_edges.remove(out_edge)
-                                break
-
-                    for e in in_edges:
-                        # skip empty memlets
-                        if e.data.is_empty():
-                            continue
-                        # Store all subsets that have been written
-                        ws[n.data].append(e.data.subset)
-                    for e in out_edges:
-                        # skip empty memlets
-                        if e.data.is_empty():
+            # TODO: This only works if every data descriptor is only once in a path.
+            for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()):
+                if not isinstance(n, nd.AccessNode):
+                    # Read and writes can only be done through access nodes,
+                    #  so ignore every other node.
+                    continue
+
+                # Get a list of all incoming (writes) and outgoing (reads) edges of the
+                #  access node, ignore all empty memlets as they do not carry any data.
+                in_edges = [in_edge for in_edge in subgraph.in_edges(n) if not in_edge.data.is_empty()]
+                out_edges = [out_edge for out_edge in subgraph.out_edges(n) if not out_edge.data.is_empty()]
+
+                # Extract the subsets that describes where we read and write the data
+                #  and store them for the later filtering.
+                # NOTE: In certain cases the corresponding subset might be None, in this case
+                #   we assume that the whole array is written, which is the default behaviour.
+                ac_desc = n.desc(self.sdfg)
+                ac_size = ac_desc.total_size
+                in_subsets = dict()
+                for in_edge in in_edges:
+                    # Ensure that if the destination subset is not given, our assumption, that the
+                    #  whole array is written to, is valid, by testing if the memlet transfers the
+                    #  whole array.
+                    assert (in_edge.data.dst_subset is not None) or (in_edge.data.num_elements() == ac_size)
+                    in_subsets[in_edge] = (
+                            sbs.Range.from_array(ac_desc)
+                            if in_edge.data.dst_subset is None
+                            else in_edge.data.dst_subset
+                    )
+                out_subsets = dict()
+                for out_edge in out_edges:
+                    assert (out_edge.data.src_subset is not None) or (out_edge.data.num_elements() == ac_size)
+                    out_subsets[out_edge] = (
+                        sbs.Range.from_array(ac_desc)
+                        if out_edge.data.src_subset is None
+                        else out_edge.data.src_subset
+                    )
+
+                # If a memlet reads a particular region of data from the access node and there
+                #  exists a memlet at the same access node that writes to the same region, then
+                #  this read is ignored, and not included in the final read set, but only
+                #  accounted fro in the write set. See also note below.
+                # TODO: Handle the case when multiple disjoint writes are needed to cover the read.
+                for out_edge in list(out_edges):
+                    for in_edge in in_edges:
+                        if out_edge.data.data != in_edge.data.data:
+                            # NOTE: This check does not make any sense, and is in my (@philip-paul-mueller)
+                            #   view wrong. As it will filter out some accesses but not all, which one solely
+                            #   depends on how the memelts were created, i.e. to which container their `data`
+                            #   attribute is associated to. See also [issue #1643](https://github.com/spcl/dace/issues/1643).
                             continue
-                        rs[n.data].append(e.data.subset)
-            # Union all subgraphs, so an array that was excluded from the read
-            # set because it was written first is still included if it is read
-            # in another subgraph
-            for data, accesses in rs.items():
+                        if in_subsets[in_edge].covers(out_subsets[out_edge]):
+                            out_edges.remove(out_edge)
+                            break
+
+                # Update the read and write sets of the subgraph.
+                if in_edges:
+                    subgraph_write_set[n.data].extend(in_subsets.values())
+                if out_edges:
+                    subgraph_read_set[n.data].extend(out_subsets[out_edge] for out_edge in out_edges)
+
+            # Add the subgraph's read and write set to the final ones.
+            for data, accesses in subgraph_read_set.items():
                 read_set[data] += accesses
-            for data, accesses in ws.items():
+            for data, accesses in subgraph_write_set.items():
                 write_set[data] += accesses
-        return read_set, write_set
+
+        return copy.deepcopy((read_set, write_set))
+
 
     def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]:
         """

From 3748c03cd863c3f5241ab5e0e03164d3c6c61319 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Fri, 11 Oct 2024 08:55:51 +0200
Subject: [PATCH 03/17] Now made `read_and_write_sets()` fully adhere to their
 own definition.

---
 dace/sdfg/state.py       | 6 ------
 tests/sdfg/state_test.py | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index a9c99da7f9..ba853d088d 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -811,12 +811,6 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr,
                 # TODO: Handle the case when multiple disjoint writes are needed to cover the read.
                 for out_edge in list(out_edges):
                     for in_edge in in_edges:
-                        if out_edge.data.data != in_edge.data.data:
-                            # NOTE: This check does not make any sense, and is in my (@philip-paul-mueller)
-                            #   view wrong. As it will filter out some accesses but not all, which one solely
-                            #   depends on how the memelts were created, i.e. to which container their `data`
-                            #   attribute is associated to. See also [issue #1643](https://github.com/spcl/dace/issues/1643).
-                            continue
                         if in_subsets[in_edge].covers(out_subsets[out_edge]):
                             out_edges.remove(out_edge)
                             break
diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index 1fac1d56f4..7d74ae6bcc 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -76,8 +76,6 @@ def test_read_and_write_set_filter():
     state.add_nedge(
             B,
             C,
-            # If the Memlet would be `B[0] -> 1, 1` it would then be filtered out.
-            #   This is an intentional behaviour for compatibility.
             dace.Memlet("C[1, 1] -> 0"),
     )
     state.add_nedge(
@@ -89,12 +87,8 @@ def test_read_and_write_set_filter():
 
     expected_reads = {
             "A": [sbs.Range.from_string("0, 0")],
-            # See comment in `state._read_and_write_sets()` why "B" is here
-            #   it should actually not, but it is a bug.
-            "B": [sbs.Range.from_string("0")],
     }
     expected_writes = {
-            # However, this should always be here.
             "B": [sbs.Range.from_string("0")],
             "C": [sbs.Range.from_string("0, 0"), sbs.Range.from_string("1, 1")],
     }

From 3ab4bf3e379ea96fbb7635904cc8b320215fc558 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Fri, 11 Oct 2024 11:27:47 +0200
Subject: [PATCH 04/17] Updated a test for the `PruneConnectors`
 transformation.

Before in this case the transformation was not able to be applied.
The reason was because of the behaviour of the `SDFGState.read_and_write_sets()` function.
However, now with the fix of the [PR#1678](https://github.com/spcl/dace/pull/1678) the transformation became applicable.
---
 tests/transformations/prune_connectors_test.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py
index 4026ec3e1c..8ec0d3615a 100644
--- a/tests/transformations/prune_connectors_test.py
+++ b/tests/transformations/prune_connectors_test.py
@@ -153,7 +153,6 @@ def _make_read_write_sdfg(
 
     Depending on `conforming_memlet` the memlet that copies `inner_A` into `inner_B`
     will either be associated to `inner_A` (`True`) or `inner_B` (`False`).
-    This choice has consequences on if the transformation can apply or not.
 
     Notes:
         This is most likely a bug, see [issue#1643](https://github.com/spcl/dace/issues/1643),
@@ -421,7 +420,6 @@ def test_prune_connectors_with_dependencies():
 
 
 def test_read_write_1():
-    # Because the memlet is conforming, we can apply the transformation.
     sdfg, nsdfg = _make_read_write_sdfg(True)
 
     assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
@@ -429,10 +427,13 @@ def test_read_write_1():
 
 
 def test_read_write_2():
-    # Because the memlet is not conforming, we can not apply the transformation.
+    # In previous versions of DaCe the transformation could not be applied in the
+    #  case of a not conforming Memlet.
+    #  See [PR#1678](https://github.com/spcl/dace/pull/1678)
     sdfg, nsdfg = _make_read_write_sdfg(False)
 
-    assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
+    assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
+    sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True)
 
 
 if __name__ == "__main__":

From b4feddfe8459dcb49e46ef416512e022624d2e60 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Fri, 11 Oct 2024 13:18:49 +0200
Subject: [PATCH 05/17] Added code to `test_more_than_a_map` to ensure that the
 transformation does not change the behaviour of teh output.

---
 .../move_loop_into_map_test.py                | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py
index dca775bb7a..de610d0ca8 100644
--- a/tests/transformations/move_loop_into_map_test.py
+++ b/tests/transformations/move_loop_into_map_test.py
@@ -2,6 +2,7 @@
 import dace
 from dace.transformation.interstate import MoveLoopIntoMap
 import unittest
+import copy
 import numpy as np
 
 I = dace.symbol("I")
@@ -170,7 +171,26 @@ def test_more_than_a_map(self):
         body.add_nedge(aread, oread, dace.Memlet.from_array('A', aarr))
         body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr))
         sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1')
-        count = sdfg.apply_transformations(MoveLoopIntoMap)
+
+        sdfg_args_ref = {
+            "A": np.array(np.random.rand(3, 3), dtype=np.float64),
+            "B": np.array(np.random.rand(3, 3), dtype=np.float64),
+            "out": np.array(np.random.rand(3, 3), dtype=np.float64),
+        }
+        sdfg_args_res = copy.deepcopy(sdfg_args_ref)
+
+        # Perform the reference execution
+        sdfg(**sdfg_args_ref)
+
+        # Apply the transformation and execute the SDFG again.
+        count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True)
+        sdfg(**sdfg_args_res)
+
+        for name in sdfg_args_ref.keys():
+            self.assertTrue(
+                np.allclose(sdfg_args_ref[name], sdfg_args_res[name]),
+                f"Miss match for {name}",
+            )
         self.assertFalse(count > 0)
 
     def test_more_than_a_map_1(self):

From 70fa3db43020073293dca5bdf9d2bf6649c93814 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Mon, 14 Oct 2024 09:29:58 +0200
Subject: [PATCH 06/17] Added the new memlet creation syntax.

---
 tests/sdfg/state_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index 7d74ae6bcc..33e02088a4 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -71,17 +71,17 @@ def test_read_and_write_set_filter():
     state.add_nedge(
             A,
             B,
-            dace.Memlet("B[0] -> 0, 0"),
+            dace.Memlet("B[0] -> [0, 0]"),
     )
     state.add_nedge(
             B,
             C,
-            dace.Memlet("C[1, 1] -> 0"),
+            dace.Memlet("C[1, 1] -> [0]"),
     )
     state.add_nedge(
             B,
             C,
-            dace.Memlet("B[0] -> 0, 0"),
+            dace.Memlet("B[0] -> [0, 0]"),
     )
     sdfg.validate()
 

From b187a8260ba197280b7b1c3cd8075a6a0f47a810 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Mon, 14 Oct 2024 13:15:13 +0200
Subject: [PATCH 07/17] Modified some comments to make them clearer.

---
 dace/sdfg/state.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 855cf83f62..53f8d98491 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -766,7 +766,8 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr,
             subgraph_write_set = collections.defaultdict(list)
             # Traverse in topological order, so data that is written before it
             # is read is not counted in the read set
-            # TODO: This only works if every data descriptor is only once in a path.
+            # NOTE: Each AccessNode is processed individually. Thus, if an array appears multiple
+            #   times in a path, the individual results are combined, without further processing.
             for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()):
                 if not isinstance(n, nd.AccessNode):
                     # Read and writes can only be done through access nodes,
@@ -808,7 +809,8 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr,
                 #  exists a memlet at the same access node that writes to the same region, then
                 #  this read is ignored, and not included in the final read set, but only
                 #  accounted fro in the write set. See also note below.
-                # TODO: Handle the case when multiple disjoint writes are needed to cover the read.
+                # TODO: Handle the case when multiple disjoint writes would be needed to cover the
+                #   read. E.g. edges write `0:10` and `10:20` but the read happens at `5:15`.
                 for out_edge in list(out_edges):
                     for in_edge in in_edges:
                         if in_subsets[in_edge].covers(out_subsets[out_edge]):

From 9c6cb6c91cd924cb9269c727192778106fe36a17 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Mon, 14 Oct 2024 13:15:48 +0200
Subject: [PATCH 08/17] Modified the
 `tests/transformations/move_loop_into_map_test.py::test_more_than_a_map()`
 test.

The test now assumes that it can be applied, as it was discussed on github this should be the correct behaviour.
Furthermore, there is also a test which ensures that the same values are computed.

This commit also adds an additional test (``tests/transformations/move_loop_into_map_test.py::test_more_than_a_map_4()`) which is essentially the same.
However, in one of its Memlet it is a bit different and therefore the transformation can not be applied.
---
 .../move_loop_into_map_test.py                | 58 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py
index de610d0ca8..fbb05d30f5 100644
--- a/tests/transformations/move_loop_into_map_test.py
+++ b/tests/transformations/move_loop_into_map_test.py
@@ -148,7 +148,10 @@ def test_apply_multiple_times_1(self):
         self.assertTrue(np.allclose(val, ref))
 
     def test_more_than_a_map(self):
-        """ `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency. """
+        """
+        `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency.
+        However, there is no dependency.
+        """
         sdfg = dace.SDFG('more_than_a_map')
         _, aarr = sdfg.add_array('A', (3, 3), dace.float64)
         _, barr = sdfg.add_array('B', (3, 3), dace.float64)
@@ -168,7 +171,7 @@ def test_more_than_a_map(self):
                                 external_edges=True,
                                 input_nodes=dict(out=oread, B=bread),
                                 output_nodes=dict(tmp=twrite))
-        body.add_nedge(aread, oread, dace.Memlet.from_array('A', aarr))
+        body.add_nedge(aread, oread, dace.Memlet.from_array('A', oarr))
         body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr))
         sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1')
 
@@ -191,7 +194,7 @@ def test_more_than_a_map(self):
                 np.allclose(sdfg_args_ref[name], sdfg_args_res[name]),
                 f"Miss match for {name}",
             )
-        self.assertFalse(count > 0)
+        self.assertTrue(count > 0)
 
     def test_more_than_a_map_1(self):
         """
@@ -289,6 +292,55 @@ def test_more_than_a_map_3(self):
         count = sdfg.apply_transformations(MoveLoopIntoMap)
         self.assertFalse(count > 0)
 
+    def test_more_than_a_map_4(self):
+        """
+        The test is very similar to `test_more_than_a_map()`. But a memlet is different
+        which leads to a RW dependency, which blocks the transformation.
+        """
+        sdfg = dace.SDFG('more_than_a_map')
+        _, aarr = sdfg.add_array('A', (3, 3), dace.float64)
+        _, barr = sdfg.add_array('B', (3, 3), dace.float64)
+        _, oarr = sdfg.add_array('out', (3, 3), dace.float64)
+        _, tarr = sdfg.add_array('tmp', (3, 3), dace.float64, transient=True)
+        body = sdfg.add_state('map_state')
+        aread = body.add_access('A')
+        oread = body.add_access('out')
+        bread = body.add_access('B')
+        twrite = body.add_access('tmp')
+        owrite = body.add_access('out')
+        body.add_mapped_tasklet('op',
+                                dict(i='0:3', j='0:3'),
+                                dict(__in1=dace.Memlet('out[i, j]'), __in2=dace.Memlet('B[i, j]')),
+                                '__out = __in1 - __in2',
+                                dict(__out=dace.Memlet('tmp[i, j]')),
+                                external_edges=True,
+                                input_nodes=dict(out=oread, B=bread),
+                                output_nodes=dict(tmp=twrite))
+        body.add_nedge(aread, oread, dace.Memlet('A[Mod(_, 3), 0:3] -> [Mod(_ + 1, 3), 0:3]', aarr))
+        body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr))
+        sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1')
+
+        sdfg_args_ref = {
+            "A": np.array(np.random.rand(3, 3), dtype=np.float64),
+            "B": np.array(np.random.rand(3, 3), dtype=np.float64),
+            "out": np.array(np.random.rand(3, 3), dtype=np.float64),
+        }
+        sdfg_args_res = copy.deepcopy(sdfg_args_ref)
+
+        # Perform the reference execution
+        sdfg(**sdfg_args_ref)
+
+        # Apply the transformation and execute the SDFG again.
+        count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True)
+        sdfg(**sdfg_args_res)
+
+        for name in sdfg_args_ref.keys():
+            self.assertTrue(
+                np.allclose(sdfg_args_ref[name], sdfg_args_res[name]),
+                f"Miss match for {name}",
+            )
+        self.assertFalse(count > 0)
+
 
 if __name__ == '__main__':
     unittest.main()

From b7fe24219455e980b3c9f1ca6de06291f9dc986a Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Tue, 22 Oct 2024 14:43:50 +0200
Subject: [PATCH 09/17] Added a test to highlights the error.

Currently we only see the error in `tests/numpy/ufunc_support_test.py::test_ufunc_add_accumulate_simple` if the auto optimizations are enabled.
---
 .../refine_nested_access_test.py              | 101 ++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py
index d9fb9a7392..94c02a88de 100644
--- a/tests/transformations/refine_nested_access_test.py
+++ b/tests/transformations/refine_nested_access_test.py
@@ -156,7 +156,108 @@ def inner_sdfg(A: dace.int32[5], B: dace.int32[5, 5], idx_a: int, idx_b: int):
     assert np.allclose(ref, val)
 
 
+def _make_rna_read_and_write_set_sdfg(diff_in_out: bool) -> dace.SDFG:
+    """Generates the SDFG for the `test_rna_read_and_write_sets_*()` tests.
+
+    If `diff_in_out` is `False` then the output is also used as temporary storage
+    within the nested SDFG. Because of the definition of the read/write sets,
+    this usage of the temporary storage is not picked up and it is only considered
+    as write set.
+
+    If `diff_in_out` is true, then a different storage container, which is classified
+    as output, is used as temporary storage.
+
+    This test was added during [PR#1678](https://github.com/spcl/dace/pull/1678).
+    """
+
+    def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG:
+        sdfg = dace.SDFG("inner_sdfg")
+        state = sdfg.add_state(is_start_block=True)
+        sdfg.add_array("A", dtype=dace.float64, shape=(2,), transient=False)
+        sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False)
+
+        A = state.add_access("A")
+        T1_input = state.add_access("T1")
+        if diff_in_out:
+            sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False)
+            T1_output = state.add_access("T2")
+        else:
+            T1_output = state.add_access("T1")
+
+        tsklt = state.add_tasklet(
+            "comp",
+            inputs={"__in1": None, "__in2": None},
+            outputs={"__out": None},
+            code="__out = __in1 + __in2",
+        )
+
+        state.add_edge(A, None, tsklt, "__in1", dace.Memlet("A[1]"))
+        # An alternative would be to write to a different location here.
+        #  Then, the data would be added to the access node.
+        state.add_edge(A, None, T1_input, None, dace.Memlet("A[0] -> [0]"))
+        state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet("T1[0]"))
+        state.add_edge(tsklt, "__out", T1_output, None, dace.Memlet(T1_output.data + "[1]"))
+        return sdfg
+
+    sdfg = dace.SDFG("Parent_SDFG")
+    state = sdfg.add_state(is_start_block=True)
+    
+    sdfg.add_array("A", dtype=dace.float64, shape=(2,), transient=False)
+    sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False)
+    sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False)
+    A = state.add_access("A")
+    T1 = state.add_access("T1")
+
+    nested_sdfg = _make_nested_sdfg(diff_in_out)
+
+    nsdfg = state.add_nested_sdfg(
+        nested_sdfg,
+        parent=sdfg,
+        inputs={"A"},
+        outputs={"T2", "T1"} if diff_in_out else {"T1"},
+        symbol_mapping={},
+    )
+
+    state.add_edge(A, None, nsdfg, "A", dace.Memlet("A[0:2]"))
+    state.add_edge(nsdfg, "T1", T1, None, dace.Memlet("T1[0:2]"))
+
+    if diff_in_out:
+        state.add_edge(nsdfg, "T2", state.add_access("T2"), None, dace.Memlet("T2[0:2]"))
+    sdfg.validate()
+    return sdfg
+
+
+def test_rna_read_and_write_sets_doule_use():
+    """
+    NOTE: Under the current definition of the read/write sets this test will fail.
+    """
+
+    # The output is used also as temporary storage.
+    sdfg = _make_rna_read_and_write_set_sdfg(False)
+    nb_applied = sdfg.apply_transformations_repeated(
+        [RefineNestedAccess],
+        validate=True,
+        validate_all=True,
+    )
+    assert nb_applied > 0
+
+
+def test_rna_read_and_write_sets_different_storage():
+
+    # There is a dedicated temporary storage used.
+    sdfg = _make_rna_read_and_write_set_sdfg(True)
+
+    nb_applied = sdfg.apply_transformations_repeated(
+        [RefineNestedAccess],
+        validate=True,
+        validate_all=True,
+    )
+    assert nb_applied > 0
+
+
 if __name__ == '__main__':
     test_refine_dataflow()
     test_refine_interstate()
     test_free_symbols_only_by_indices()
+    test_rna_read_and_write_sets_different_storage()
+    test_rna_read_and_write_sets_doule_use()

From b546b0742104464de08a3ee2840c073be2a52605 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Tue, 22 Oct 2024 14:50:55 +0200
Subject: [PATCH 10/17] I now removed the filtering inside the read and write
 set.

This is _just_ a proposal.
---
 dace/sdfg/state.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 53f8d98491..09e7607d65 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -761,13 +761,15 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr,
         read_set = collections.defaultdict(list)
         write_set = collections.defaultdict(list)
 
+        # NOTE: In a previous version a _single_ read (i.e. leaving Memlet) that was
+        #   fully covered by a single write (i.e. an incoming Memlet) was removed from
+        #   the read set and only the write survived. However, this was never fully
+        #   implemented nor correctly implemented and caused problems.
+        #   So this filtering was removed.
+
         for subgraph in utils.concurrent_subgraphs(self):
             subgraph_read_set = collections.defaultdict(list)  # read and write set of this subgraph.
             subgraph_write_set = collections.defaultdict(list)
-            # Traverse in topological order, so data that is written before it
-            # is read is not counted in the read set
-            # NOTE: Each AccessNode is processed individually. Thus, if an array appears multiple
-            #   times in a path, the individual results are combined, without further processing.
             for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()):
                 if not isinstance(n, nd.AccessNode):
                     # Read and writes can only be done through access nodes,
@@ -805,18 +807,6 @@ def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr,
                         else out_edge.data.src_subset
                     )
 
-                # If a memlet reads a particular region of data from the access node and there
-                #  exists a memlet at the same access node that writes to the same region, then
-                #  this read is ignored, and not included in the final read set, but only
-                #  accounted fro in the write set. See also note below.
-                # TODO: Handle the case when multiple disjoint writes would be needed to cover the
-                #   read. E.g. edges write `0:10` and `10:20` but the read happens at `5:15`.
-                for out_edge in list(out_edges):
-                    for in_edge in in_edges:
-                        if in_subsets[in_edge].covers(out_subsets[out_edge]):
-                            out_edges.remove(out_edge)
-                            break
-
                 # Update the read and write sets of the subgraph.
                 if in_edges:
                     subgraph_write_set[n.data].extend(in_subsets.values())

From ae205909ca3a78cfea7203dc037461f30d08ceb8 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 09:32:00 +0200
Subject: [PATCH 11/17] Fixed `state_test.py::test_read_and_write_set_filter`.

Because of the removed filtering, `B` is now part of the read set as well.
---
 tests/sdfg/state_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index 33e02088a4..7a6894c8ac 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -87,6 +87,7 @@ def test_read_and_write_set_filter():
 
     expected_reads = {
             "A": [sbs.Range.from_string("0, 0")],
+            "B": [sbs.Range.from_string("0")],
     }
     expected_writes = {
             "B": [sbs.Range.from_string("0")],

From db211fa169732e87c899215ebccbbbd3bd583c03 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 09:35:13 +0200
Subject: [PATCH 12/17] Fixed the `state_test.py::test_read_write_set` test.

Because of the removed filtering `B` is no longer removed from the read set.
However, because now the test has become quite useless.

We also see that the test was specifically constructed in such a way that the filtering applies.
Otherwise it would never triggered.
---
 tests/sdfg/state_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index 7a6894c8ac..efc11cc844 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -20,7 +20,9 @@ def test_read_write_set():
     state.add_memlet_path(rw_b, task2, dst_conn='B', memlet=dace.Memlet('B[2]'))
     state.add_memlet_path(task2, write_c, src_conn='C', memlet=dace.Memlet('C[2]'))
 
-    assert 'B' not in state.read_and_write_sets()[0]
+    read_set, write_set = state.read_and_write_sets()
+    assert {'B', 'A'} == read_set
+    assert {'C', 'B'} == write_set
 
 
 def test_read_write_set_y_formation():

From 570437b18ac3a780c0150fbe923babd61f354cb4 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 09:40:45 +0200
Subject: [PATCH 13/17] Fixed the
 `state_test.py::test_read_write_set_y_formation` test.

Because of the removed filtering `B` is no longer removed from the read set.
However, because now the test has become quite useless.

We also see that the test was specifically constructed in such a way that the filtering applies.
Otherwise it would never triggered.
---
 tests/sdfg/state_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index efc11cc844..4bde3788e0 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -44,7 +44,9 @@ def test_read_write_set_y_formation():
     state.add_memlet_path(rw_b, task2, dst_conn='B', memlet=dace.Memlet(data='B', subset='0'))
     state.add_memlet_path(task2, write_c, src_conn='C', memlet=dace.Memlet(data='C', subset='0'))
 
-    assert 'B' not in state.read_and_write_sets()[0]
+    read_set, write_set = state.read_and_write_sets()
+    assert {'B', 'A'} == read_set
+    assert {'C', 'B'} == write_set
 
 
 def test_deepcopy_state():

From cb80f0b3288cedd7ea89acb035cae7873f7afe2f Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 14:34:04 +0200
Subject: [PATCH 14/17] Fixed
 `move_loop_into_map_test.py::MoveLoopIntoMapTest::test_more_than_a_map`.

Because the filtering is now not applied, like it was originally, the transformation does no longer apply.
However, this is the historical expected behaviour.
---
 .../move_loop_into_map_test.py                | 24 ++++---------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py
index fbb05d30f5..ad51941cb0 100644
--- a/tests/transformations/move_loop_into_map_test.py
+++ b/tests/transformations/move_loop_into_map_test.py
@@ -150,7 +150,9 @@ def test_apply_multiple_times_1(self):
     def test_more_than_a_map(self):
         """
         `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency.
-        However, there is no dependency.
+
+        Note that there is actually no dependency, however, the transformation, because it relies
+        on `SDFGState.read_and_write_sets()` it can not detect this and can thus not be applied.
         """
         sdfg = dace.SDFG('more_than_a_map')
         _, aarr = sdfg.add_array('A', (3, 3), dace.float64)
@@ -175,26 +177,8 @@ def test_more_than_a_map(self):
         body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr))
         sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1')
 
-        sdfg_args_ref = {
-            "A": np.array(np.random.rand(3, 3), dtype=np.float64),
-            "B": np.array(np.random.rand(3, 3), dtype=np.float64),
-            "out": np.array(np.random.rand(3, 3), dtype=np.float64),
-        }
-        sdfg_args_res = copy.deepcopy(sdfg_args_ref)
-
-        # Perform the reference execution
-        sdfg(**sdfg_args_ref)
-
-        # Apply the transformation and execute the SDFG again.
         count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True)
-        sdfg(**sdfg_args_res)
-
-        for name in sdfg_args_ref.keys():
-            self.assertTrue(
-                np.allclose(sdfg_args_ref[name], sdfg_args_res[name]),
-                f"Miss match for {name}",
-            )
-        self.assertTrue(count > 0)
+        self.assertTrue(count == 0)
 
     def test_more_than_a_map_1(self):
         """

From b704a4378cd8b03a1d1e5469458118c2fd70ce2d Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 14:39:03 +0200
Subject: [PATCH 15/17] Fixed `prune_connectors_test.py::test_read_write_*`.

Because of the removed filtering the transformat can no longer apply.
However, I originally added these tests to demonstrate this inconsistent behaviour in the first place.
So removing them is now the right choice.

This commit also combines them to `prune_connectors_test.py::test_read_write`.
---
 .../transformations/prune_connectors_test.py  | 25 +++----------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py
index 9995fbd305..b7b287d77e 100644
--- a/tests/transformations/prune_connectors_test.py
+++ b/tests/transformations/prune_connectors_test.py
@@ -331,16 +331,6 @@ def test_unused_retval_2():
     assert np.allclose(a, 1)
 
 
-def test_read_write_1():
-    # Because the memlet is conforming, we can apply the transformation.
-    sdfg = _make_read_write_sdfg(True)
-
-    assert first_mode == PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=osdfg, expr_index=0, permissive=False)
-
-
-
-
-
 def test_prune_connectors_with_dependencies():
     sdfg = dace.SDFG('tester')
     A, A_desc = sdfg.add_array('A', [4], dace.float64)
@@ -419,21 +409,12 @@ def test_prune_connectors_with_dependencies():
     assert np.allclose(np_d, np_d_)
 
 
-def test_read_write_1():
+def test_read_write():
     sdfg, nsdfg = _make_read_write_sdfg(True)
+    assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
 
-    assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
-    sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True)
-
-
-def test_read_write_2():
-    # In previous versions of DaCe the transformation could not be applied in the
-    #  case of a not conforming Memlet.
-    #  See [PR#1678](https://github.com/spcl/dace/pull/1678)
     sdfg, nsdfg = _make_read_write_sdfg(False)
-
-    assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
-    sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True)
+    assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False)
 
 
 if __name__ == "__main__":

From f74d6e81f979483ce849606f5911c9ece3adec3f Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 14:46:27 +0200
Subject: [PATCH 16/17] General improvements to some tests.

Ensured that the return value is always accassable in the same way.
Also ensured that the `test_rna_read_and_write_sets_different_storage()` test verifies that it still gives the same result.
---
 .../refine_nested_access_test.py               | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py
index 94c02a88de..38e17ad3af 100644
--- a/tests/transformations/refine_nested_access_test.py
+++ b/tests/transformations/refine_nested_access_test.py
@@ -177,12 +177,12 @@ def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG:
         sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False)
 
         A = state.add_access("A")
-        T1_input = state.add_access("T1")
+        T1_output = state.add_access("T1")
         if diff_in_out:
             sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False)
-            T1_output = state.add_access("T2")
+            T1_input = state.add_access("T2")
         else:
-            T1_output = state.add_access("T1")
+            T1_input = state.add_access("T1")
 
         tsklt = state.add_tasklet(
             "comp",
@@ -195,7 +195,7 @@ def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG:
         # An alternative would be to write to a different location here.
         #  Then, the data would be added to the access node.
         state.add_edge(A, None, T1_input, None, dace.Memlet("A[0] -> [0]"))
-        state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet("T1[0]"))
+        state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet(T1_input.data + "[0]"))
         state.add_edge(tsklt, "__out", T1_output, None, dace.Memlet(T1_output.data + "[1]"))
         return sdfg
 
@@ -254,6 +254,16 @@ def test_rna_read_and_write_sets_different_storage():
     )
     assert nb_applied > 0
 
+    args = {
+        "A": np.array(np.random.rand(2), dtype=np.float64, copy=True),
+        "T2": np.array(np.random.rand(2), dtype=np.float64, copy=True),
+        "T1": np.zeros(2, dtype=np.float64),
+    }
+    ref = args["A"][0] + args["A"][1]
+    sdfg(**args)
+    res = args["T1"][1]
+    assert np.allclose(res, ref), f"Expected '{ref}' but got '{res}'."
+
 
 if __name__ == '__main__':
     test_refine_dataflow()

From e1039243d847f964bdec5ced1b2cdebeb1c977c5 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Wed, 23 Oct 2024 14:50:14 +0200
Subject: [PATCH 17/17] Updated
 `refine_nested_access_test.py::test_rna_read_and_write_sets_doule_use`

I realized that the transformation should not apply.
The reason is because of all arguments to the nested SDFG element `0` is accessed.
This means it can not be adjusted.
---
 tests/transformations/refine_nested_access_test.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py
index 38e17ad3af..81640665ed 100644
--- a/tests/transformations/refine_nested_access_test.py
+++ b/tests/transformations/refine_nested_access_test.py
@@ -228,18 +228,15 @@ def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG:
 
 
 def test_rna_read_and_write_sets_doule_use():
-    """
-    NOTE: Under the current definition of the read/write sets this test will fail.
-    """
-
-    # The output is used also as temporary storage.
+    # The transformation does not apply because we access element `0` of both arrays that we
+    #  pass inside the nested SDFG.
     sdfg = _make_rna_read_and_write_set_sdfg(False)
     nb_applied = sdfg.apply_transformations_repeated(
         [RefineNestedAccess],
         validate=True,
         validate_all=True,
     )
-    assert nb_applied > 0
+    assert nb_applied == 0
 
 
 def test_rna_read_and_write_sets_different_storage():