From 8d2109776bbb3e5eaf2c42536450cf8e0a4f578d Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Fri, 9 Jun 2023 15:37:58 -0600
Subject: [PATCH 01/18] adds update_csv command and test

---
 src/dcqc/main.py       |  43 ++++++++++
 tests/data/input.csv   |   4 +
 tests/data/suites.json | 179 +++++++++++++++++++++++++++++++++++++++++
 tests/test_main.py     |  17 ++++
 4 files changed, 243 insertions(+)
 create mode 100644 tests/data/input.csv
 create mode 100644 tests/data/suites.json

diff --git a/src/dcqc/main.py b/src/dcqc/main.py
index a9da6b3..566e6be 100644
--- a/src/dcqc/main.py
+++ b/src/dcqc/main.py
@@ -204,3 +204,46 @@ def qc_file(
     report = JsonReport()
     suite_json = report.generate(suite)
     json.dump(suite_json, sys.stdout, indent=2)
+
+
+@app.command()
+def update_csv(
+    suites_file: Path = input_path_arg,
+    input_file: Path = input_path_arg,
+    output_file: Path = output_path_arg,
+):
+    """Update input CSV file with dcqc_status column"""
+    suites = JsonParser.parse_objects(suites_file, SuiteABC)
+    suite_dict = {}
+    for suite in suites:
+        url = suite.target.files[0].url
+        status = suite._status.value
+        if not suite_dict.get(url):
+            suite_dict[url] = [status]
+        else:
+            suite_dict[url].append(status)
+
+    collapsed_dict = {}
+    for url, statuses in suite_dict.items():
+        if "RED" in statuses:
+            collapsed_dict[url] = "RED"
+        elif "AMBER" in statuses:
+            collapsed_dict[url] = "AMBER"
+        elif "GREEN" in statuses:
+            collapsed_dict[url] = "GREEN"
+        else:
+            collapsed_dict[url] = "NONE"
+
+    parser = CsvParser(input_file)
+    row_list = []
+    for row in parser.list_rows():
+        row_list.append(row[1])
+    for row in row_list:
+        row["dcqc_status"] = collapsed_dict[row["url"]]
+
+    keys = row_list[0].keys()
+
+    with open(output_file, "w", newline="", encoding="utf-8") as output_file:
+        dict_writer = DictWriter(output_file, keys)
+        dict_writer.writeheader()
+        dict_writer.writerows(row_list)
diff --git a/tests/data/input.csv b/tests/data/input.csv
new file mode 100644
index 0000000..946fcf8
--- /dev/null
+++ b/tests/data/input.csv
@@ -0,0 +1,4 @@
+url,file_type,md5_checksum
+syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9
+syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff
+syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9
diff --git a/tests/data/suites.json b/tests/data/suites.json
new file mode 100644
index 0000000..107a5c6
--- /dev/null
+++ b/tests/data/suites.json
@@ -0,0 +1,179 @@
+[
+  {
+    "type": "TiffSuite",
+    "target": {
+      "id": "0001",
+      "files": [
+        {
+          "url": "syn://syn51585496",
+          "metadata": {
+            "md5_checksum": "c7b08f6decb5e7572efbe6074926a843"
+          },
+          "type": "TIFF",
+          "name": "circuit.tif",
+          "local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif"
+        }
+      ],
+      "type": "SingleTarget"
+    },
+    "suite_status": {
+      "required_tests": [
+        "Md5ChecksumTest",
+        "LibTiffInfoTest",
+        "FileExtensionTest"
+      ],
+      "skipped_tests": [],
+      "status": "GREEN"
+    },
+    "tests": [
+      {
+        "type": "FileExtensionTest",
+        "tier": 1,
+        "is_external_test": false,
+        "status": "passed"
+      },
+      {
+        "type": "GrepDateTest",
+        "tier": 4,
+        "is_external_test": true,
+        "status": "passed"
+      },
+      {
+        "type": "LibTiffInfoTest",
+        "tier": 2,
+        "is_external_test": true,
+        "status": "passed"
+      },
+      {
+        "type": "Md5ChecksumTest",
+        "tier": 1,
+        "is_external_test": false,
+        "status": "passed"
+      },
+      {
+        "type": "TiffTag306DateTimeTest",
+        "tier": 4,
+        "is_external_test": true,
+        "status": "passed"
+      }
+    ]
+  },
+  {
+    "type": "TiffSuite",
+    "target": {
+      "id": "0002",
+      "files": [
+        {
+          "url": "syn://syn51585494",
+          "metadata": {
+            "md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404"
+          },
+          "type": "TIFF",
+          "name": "test_contains_word_date.tif",
+          "local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif"
+        }
+      ],
+      "type": "SingleTarget"
+    },
+    "suite_status": {
+      "required_tests": [
+        "Md5ChecksumTest",
+        "LibTiffInfoTest",
+        "FileExtensionTest"
+      ],
+      "skipped_tests": [],
+      "status": "RED"
+    },
+    "tests": [
+      {
+        "type": "FileExtensionTest",
+        "tier": 1,
+        "is_external_test": false,
+        "status": "passed"
+      },
+      {
+        "type": "GrepDateTest",
+        "tier": 4,
+        "is_external_test": true,
+        "status": "failed"
+      },
+      {
+        "type": "LibTiffInfoTest",
+        "tier": 2,
+        "is_external_test": true,
+        "status": "failed"
+      },
+      {
+        "type": "Md5ChecksumTest",
+        "tier": 1,
+        "is_external_test": false,
+        "status": "passed"
+      },
+      {
+        "type": "TiffTag306DateTimeTest",
+        "tier": 4,
+        "is_external_test": true,
+        "status": "passed"
+      }
+    ]
+  },
+  {
+    "type": "TiffSuite",
+    "target": {
+      "id": "0003",
+      "files": [
+        {
+          "url": "syn://syn51585495",
+          "metadata": {
+            "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268"
+          },
+          "type": "TIFF",
+          "name": "test_image_dirty_datetime.tif",
+          "local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif"
+        }
+      ],
+      "type": "SingleTarget"
+    },
+    "suite_status": {
+      "required_tests": [
+        "Md5ChecksumTest",
+        "LibTiffInfoTest",
+        "FileExtensionTest"
+      ],
+      "skipped_tests": [],
+      "status": "AMBER"
+    },
+    "tests": [
+      {
+        "type": "FileExtensionTest",
+        "tier": 1,
+        "is_external_test": false,
+        "status": "passed"
+      },
+      {
+        "type": "GrepDateTest",
+        "tier": 4,
+        "is_external_test": true,
+        "status": "passed"
+      },
+      {
+        "type": "LibTiffInfoTest",
+        "tier": 2,
+        "is_external_test": true,
+        "status": "passed"
+      },
+      {
+        "type": "Md5ChecksumTest",
+        "tier": 1,
+        "is_external_test": false,
+        "status": "passed"
+      },
+      {
+        "type": "TiffTag306DateTimeTest",
+        "tier": 4,
+        "is_external_test": true,
+        "status": "failed"
+      }
+    ]
+  }
+]
diff --git a/tests/test_main.py b/tests/test_main.py
index 04218b4..62e90fe 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -134,3 +134,20 @@ def test_qc_file(get_data):
     ]
     result = run_command(args)
     check_command_result(result)
+
+
+def test_update_csv(get_data, get_output):
+    suites_path = get_data("suites.json")
+    input_path = get_data("input.csv")
+    output_path = get_output("update_csv") / "output.csv"
+    output_path.unlink(missing_ok=True)
+
+    args = [
+        "update-csv",
+        suites_path,
+        input_path,
+        output_path,
+    ]
+    result = run_command(args)
+    check_command_result(result)
+    assert output_path.exists()

From c08e61612425a8d98753934ce543b1d14874d29c Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 11:22:48 -0600
Subject: [PATCH 02/18] creates csv updater

---
 src/dcqc/main.py             | 36 +++----------------------
 src/dcqc/suites/suite_abc.py |  6 +++++
 src/dcqc/updaters.py         | 52 ++++++++++++++++++++++++++++++++++++
 tests/data/output.csv        |  4 +++
 4 files changed, 65 insertions(+), 33 deletions(-)
 create mode 100644 src/dcqc/updaters.py
 create mode 100644 tests/data/output.csv

diff --git a/src/dcqc/main.py b/src/dcqc/main.py
index 566e6be..72583a8 100644
--- a/src/dcqc/main.py
+++ b/src/dcqc/main.py
@@ -13,6 +13,7 @@
 from dcqc.suites.suite_abc import SuiteABC
 from dcqc.target import SingleTarget
 from dcqc.tests.base_test import BaseTest, ExternalTestMixin
+from dcqc.updaters import CsvUpdater
 
 # Make commands optional to allow for `dcqc --version`
 app = Typer(invoke_without_command=True)
@@ -214,36 +215,5 @@ def update_csv(
 ):
     """Update input CSV file with dcqc_status column"""
     suites = JsonParser.parse_objects(suites_file, SuiteABC)
-    suite_dict = {}
-    for suite in suites:
-        url = suite.target.files[0].url
-        status = suite._status.value
-        if not suite_dict.get(url):
-            suite_dict[url] = [status]
-        else:
-            suite_dict[url].append(status)
-
-    collapsed_dict = {}
-    for url, statuses in suite_dict.items():
-        if "RED" in statuses:
-            collapsed_dict[url] = "RED"
-        elif "AMBER" in statuses:
-            collapsed_dict[url] = "AMBER"
-        elif "GREEN" in statuses:
-            collapsed_dict[url] = "GREEN"
-        else:
-            collapsed_dict[url] = "NONE"
-
-    parser = CsvParser(input_file)
-    row_list = []
-    for row in parser.list_rows():
-        row_list.append(row[1])
-    for row in row_list:
-        row["dcqc_status"] = collapsed_dict[row["url"]]
-
-    keys = row_list[0].keys()
-
-    with open(output_file, "w", newline="", encoding="utf-8") as output_file:
-        dict_writer = DictWriter(output_file, keys)
-        dict_writer.writeheader()
-        dict_writer.writerows(row_list)
+    updater = CsvUpdater(input_file, output_file)
+    updater.update(suites)
diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py
index ed6b51e..76c91ae 100644
--- a/src/dcqc/suites/suite_abc.py
+++ b/src/dcqc/suites/suite_abc.py
@@ -292,3 +292,9 @@ def from_dict(cls, dictionary: SerializedObject) -> SuiteABC:
     def get_base_class(cls):
         """Retrieve base class."""
         return SuiteABC
+
+    def get_status(self, compute_ok: bool = True) -> SuiteStatus:
+        """Compute (if applicable) and return the suite status."""
+        if self._status == SuiteStatus.NONE and compute_ok:
+            self._status = self.compute_status()
+        return self._status
diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
new file mode 100644
index 0000000..3b73c06
--- /dev/null
+++ b/src/dcqc/updaters.py
@@ -0,0 +1,52 @@
+from csv import DictWriter
+from pathlib import Path
+from dcqc.parsers import CsvParser, JsonParser
+from dcqc.suites.suite_abc import SuiteABC
+from typing import List, Dict
+
+
+class CsvUpdater:
+    input_path: Path
+    output_path: Path
+    parser: CsvParser
+
+    def __init__(self, input_path: Path, output_path: Path):
+        self.input_path = input_path
+        self.output_path = output_path
+        self.parser = CsvParser(input_path)
+
+    def update(self, suites: List[SuiteABC]):
+        suite_dict: Dict[str, List[str]] = {}  # mypy made me do this, not sure why
+        for suite in suites:
+            url = suite.target.files[0].url
+            status = suite.get_status()
+            if not suite_dict.get(url):
+                suite_dict[url] = [status.value]
+            else:
+                suite_dict[url].append(status.value)
+
+        collapsed_dict = {}
+        for url, statuses in suite_dict.items():
+            if "RED" in statuses:
+                collapsed_dict[url] = "RED"
+            elif "AMBER" in statuses:
+                collapsed_dict[url] = "AMBER"
+            elif "GREEN" in statuses:
+                collapsed_dict[url] = "GREEN"
+            else:
+                collapsed_dict[url] = "NONE"
+
+        row_list = []
+        for row in self.parser.list_rows():
+            csv_data = row[1]
+            csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]]
+            row_list.append(csv_data)
+
+        keys = row_list[0].keys()
+
+        with open(
+            str(self.output_path), "w", newline="", encoding="utf-8"
+        ) as output_file:
+            dict_writer = DictWriter(output_file, keys)
+            dict_writer.writeheader()
+            dict_writer.writerows(row_list)
diff --git a/tests/data/output.csv b/tests/data/output.csv
new file mode 100644
index 0000000..ab1167f
--- /dev/null
+++ b/tests/data/output.csv
@@ -0,0 +1,4 @@
+url,file_type,md5_checksum,dcqc_status
+syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9,GREEN
+syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff,RED
+syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9,AMBER

From 4a0f4010db8746a71dd79cac8f1bba6e906b003f Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 14:30:35 -0600
Subject: [PATCH 03/18] adds CsvUpdater test

---
 src/dcqc/updaters.py   | 17 ++++++++++-------
 tests/test_updaters.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 7 deletions(-)
 create mode 100644 tests/test_updaters.py

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index 3b73c06..3986912 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -1,8 +1,9 @@
 from csv import DictWriter
 from pathlib import Path
-from dcqc.parsers import CsvParser, JsonParser
+from typing import Dict, List
+
+from dcqc.parsers import CsvParser
 from dcqc.suites.suite_abc import SuiteABC
-from typing import List, Dict
 
 
 class CsvUpdater:
@@ -11,12 +12,14 @@ class CsvUpdater:
     parser: CsvParser
 
     def __init__(self, input_path: Path, output_path: Path):
-        self.input_path = input_path
         self.output_path = output_path
         self.parser = CsvParser(input_path)
 
     def update(self, suites: List[SuiteABC]):
-        suite_dict: Dict[str, List[str]] = {}  # mypy made me do this, not sure why
+        suite_dict: Dict[
+            str, List[str]
+        ] = {}  # mypy made me do this, but only here. not sure why
+        # {url: [list_of_statuses]} data structure to allow for multi-file targets
         for suite in suites:
             url = suite.target.files[0].url
             status = suite.get_status()
@@ -24,7 +27,7 @@ def update(self, suites: List[SuiteABC]):
                 suite_dict[url] = [status.value]
             else:
                 suite_dict[url].append(status.value)
-
+        # Evaluate dcqc_status for each url
         collapsed_dict = {}
         for url, statuses in suite_dict.items():
             if "RED" in statuses:
@@ -35,7 +38,7 @@ def update(self, suites: List[SuiteABC]):
                 collapsed_dict[url] = "GREEN"
             else:
                 collapsed_dict[url] = "NONE"
-
+        # Create CSV data structure
         row_list = []
         for row in self.parser.list_rows():
             csv_data = row[1]
@@ -43,7 +46,7 @@ def update(self, suites: List[SuiteABC]):
             row_list.append(csv_data)
 
         keys = row_list[0].keys()
-
+        # Export updated CSV
         with open(
             str(self.output_path), "w", newline="", encoding="utf-8"
         ) as output_file:
diff --git a/tests/test_updaters.py b/tests/test_updaters.py
new file mode 100644
index 0000000..109ac65
--- /dev/null
+++ b/tests/test_updaters.py
@@ -0,0 +1,37 @@
+import csv
+
+from unittest.mock import MagicMock
+
+from dcqc.updaters import CsvUpdater
+from pathlib import Path
+from dcqc.suites.suite_abc import SuiteABC
+
+
+class TestCsvUpdater:
+    mock_dict = {
+        "syn://syn51585496": "GREEN",
+        "syn://syn51585494": "RED",
+        "syn://syn51585495": "AMBER",
+    }
+
+    def __init__(self):
+        self.mocked_suites = []
+        for url, status in self.mock_dict.items():
+            suite = MagicMock(SuiteABC)
+            suite.target.files[0].url = url
+            suite.get_status.return_value = status
+            self.mocked_suites.append(suite)
+
+    def get_dcqc_status_list_from_file(self, filename):
+        with open(filename, "r") as file:
+            reader = csv.DictReader(file)
+            status_list = [row["dcqc_status"] for row in reader]
+        return status_list
+
+    def test_that_csv_updater_updates_csv_as_expected(self, get_data):
+        input_file = get_data("input.csv")
+        output_file = get_data("output.csv")
+        updater = CsvUpdater(input_file, output_file)
+        updater.update(self.mocked_suites)
+        status_list = self.get_dcqc_status_list_from_file(output_file)
+        assert status_list == ["GREEN", "RED", "AMBER"]

From 59d9825cebc70e33525f3c421e890466fd1540c0 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 14:31:05 -0600
Subject: [PATCH 04/18] removes not needed deps

---
 tests/test_updaters.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index 109ac65..5a4af65 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -3,7 +3,6 @@
 from unittest.mock import MagicMock
 
 from dcqc.updaters import CsvUpdater
-from pathlib import Path
 from dcqc.suites.suite_abc import SuiteABC
 
 

From 1e93df9a71df2db1bbf627cbe3a91afab9d9a74c Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 14:32:57 -0600
Subject: [PATCH 05/18] update after pre-commit

---
 tests/test_updaters.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index 5a4af65..2ba24a8 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -1,9 +1,8 @@
 import csv
-
 from unittest.mock import MagicMock
 
-from dcqc.updaters import CsvUpdater
 from dcqc.suites.suite_abc import SuiteABC
+from dcqc.updaters import CsvUpdater
 
 
 class TestCsvUpdater:

From 7b433d3439e9de6663a19a3103440dc08abd6e4e Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 15:47:12 -0600
Subject: [PATCH 06/18] adds coverage for test_suites

---
 tests/test_suites.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_suites.py b/tests/test_suites.py
index 46b4a59..a086daf 100644
--- a/tests/test_suites.py
+++ b/tests/test_suites.py
@@ -1,5 +1,7 @@
 import pytest
 
+from unittest.mock import patch
+
 from dcqc.file import FileType
 from dcqc.suites.suite_abc import SuiteABC, SuiteStatus
 from dcqc.suites.suites import FileSuite, OmeTiffSuite, TiffSuite
@@ -124,3 +126,16 @@ def test_that_a_suite_will_consider_required_tests_when_passing(test_targets):
     suite = SuiteABC.from_target(target, required_tests)
     suite_status = suite.compute_status()
     assert suite_status == SuiteStatus.GREEN
+
+
+def test_that_status_is_computed_if_not_already_assigned(test_targets):
+    with patch.object(
+        SuiteABC, "compute_status", return_value=SuiteStatus.GREEN
+    ) as patch_compute_status:
+        target = test_targets["good"]
+        required_tests = ["Md5ChecksumTest"]
+        suite = SuiteABC.from_target(target, required_tests)
+        suite._status = SuiteStatus.NONE
+        suite_status = suite.get_status()
+        assert suite_status == SuiteStatus.GREEN
+        patch_compute_status.assert_called_once()

From 664882dc956884ee3bce4aed170e30c42f9e5287 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 16:09:58 -0600
Subject: [PATCH 07/18] updates test_updaters

---
 tests/test_suites.py   |  4 ++--
 tests/test_updaters.py | 44 ++++++++++++++++++++++++++++--------------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/tests/test_suites.py b/tests/test_suites.py
index a086daf..9f96a5c 100644
--- a/tests/test_suites.py
+++ b/tests/test_suites.py
@@ -1,7 +1,7 @@
-import pytest
-
 from unittest.mock import patch
 
+import pytest
+
 from dcqc.file import FileType
 from dcqc.suites.suite_abc import SuiteABC, SuiteStatus
 from dcqc.suites.suites import FileSuite, OmeTiffSuite, TiffSuite
diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index 2ba24a8..e268069 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -5,20 +5,34 @@
 from dcqc.updaters import CsvUpdater
 
 
+def generate_mocked_suites(mock_dict):
+    mocked_suites = []
+    for url, status in mock_dict.items():
+        suite = MagicMock(SuiteABC)
+        suite.target.files[0].url = url
+        suite.get_status.return_value = status
+        mocked_suites.append(suite)
+        return mocked_suites
+
+
 class TestCsvUpdater:
-    mock_dict = {
+    mock_dict_single = {
         "syn://syn51585496": "GREEN",
         "syn://syn51585494": "RED",
         "syn://syn51585495": "AMBER",
     }
+    mock_dict_multi = {
+        "syn://syn51585496": "GREEN",
+        "syn://syn51585494": "GREEN",
+        "syn://syn51585495": "GREEN",
+    }
 
-    def __init__(self):
-        self.mocked_suites = []
-        for url, status in self.mock_dict.items():
-            suite = MagicMock(SuiteABC)
-            suite.target.files[0].url = url
-            suite.get_status.return_value = status
-            self.mocked_suites.append(suite)
+    def __init__(self, get_data):
+        self.mocked_suites_single = generate_mocked_suites(self.mock_dict_single)
+        self.mocked_suites_multi = generate_mocked_suites(self.mock_dict_multi)
+        self.input_file = get_data("input.csv")
+        self.output_file = get_data("output.csv")
+        self.updater = CsvUpdater(self.input_file, self.output_file)
 
     def get_dcqc_status_list_from_file(self, filename):
         with open(filename, "r") as file:
@@ -26,10 +40,12 @@ def get_dcqc_status_list_from_file(self, filename):
             status_list = [row["dcqc_status"] for row in reader]
         return status_list
 
-    def test_that_csv_updater_updates_csv_as_expected(self, get_data):
-        input_file = get_data("input.csv")
-        output_file = get_data("output.csv")
-        updater = CsvUpdater(input_file, output_file)
-        updater.update(self.mocked_suites)
-        status_list = self.get_dcqc_status_list_from_file(output_file)
+    def test_that_csv_updater_updates_csv_as_expected_with_single_targets(self):
+        self.updater.update(self.mocked_suites_single)
+        status_list = self.get_dcqc_status_list_from_file(self.output_file)
+        assert status_list == ["GREEN", "RED", "AMBER"]
+
+    def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(self):
+        self.updater.update(self.mocked_suites_multi)
+        status_list = self.get_dcqc_status_list_from_file(self.output_file)
         assert status_list == ["GREEN", "RED", "AMBER"]

From 189d2f4e2e2861ac23e7e3fff0c33cc4c8bf0441 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Mon, 12 Jun 2023 16:54:27 -0600
Subject: [PATCH 08/18] updates updater to handle directory

---
 src/dcqc/updaters.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index 3986912..85f579f 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -47,8 +47,9 @@ def update(self, suites: List[SuiteABC]):
 
         keys = row_list[0].keys()
         # Export updated CSV
+        self.output_path.parent.mkdir(parents=True, exist_ok=True)
         with open(
-            str(self.output_path), "w", newline="", encoding="utf-8"
+            str(self.output_path), "w+", newline="", encoding="utf-8"
         ) as output_file:
             dict_writer = DictWriter(output_file, keys)
             dict_writer.writeheader()

From 95e09b172562fee7de7742cf86d8fe269aff08de Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 09:19:14 -0600
Subject: [PATCH 09/18] remove compute_ok

---
 src/dcqc/suites/suite_abc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py
index 76c91ae..38e08a4 100644
--- a/src/dcqc/suites/suite_abc.py
+++ b/src/dcqc/suites/suite_abc.py
@@ -293,8 +293,8 @@ def get_base_class(cls):
         """Retrieve base class."""
         return SuiteABC
 
-    def get_status(self, compute_ok: bool = True) -> SuiteStatus:
+    def get_status(self) -> SuiteStatus:
         """Compute (if applicable) and return the suite status."""
-        if self._status == SuiteStatus.NONE and compute_ok:
+        if self._status == SuiteStatus.NONE:
             self._status = self.compute_status()
         return self._status

From 0d0f788407427ebb1a72e201ddcd8fda8ebd995e Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 10:02:58 -0600
Subject: [PATCH 10/18] implement defaultdict

---
 src/dcqc/updaters.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index 85f579f..b487916 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -1,11 +1,14 @@
+from collections import defaultdict
 from csv import DictWriter
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, List
+from typing import List
 
 from dcqc.parsers import CsvParser
 from dcqc.suites.suite_abc import SuiteABC
 
 
+@dataclass
 class CsvUpdater:
     input_path: Path
     output_path: Path
@@ -13,20 +16,15 @@ class CsvUpdater:
 
     def __init__(self, input_path: Path, output_path: Path):
         self.output_path = output_path
-        self.parser = CsvParser(input_path)
+        self.input_path = input_path
 
     def update(self, suites: List[SuiteABC]):
-        suite_dict: Dict[
-            str, List[str]
-        ] = {}  # mypy made me do this, but only here. not sure why
+        suite_dict = defaultdict(list)
         # {url: [list_of_statuses]} data structure to allow for multi-file targets
         for suite in suites:
             url = suite.target.files[0].url
             status = suite.get_status()
-            if not suite_dict.get(url):
-                suite_dict[url] = [status.value]
-            else:
-                suite_dict[url].append(status.value)
+            suite_dict[url].append(status.value)
         # Evaluate dcqc_status for each url
         collapsed_dict = {}
         for url, statuses in suite_dict.items():
@@ -40,7 +38,8 @@ def update(self, suites: List[SuiteABC]):
                 collapsed_dict[url] = "NONE"
         # Create CSV data structure
         row_list = []
-        for row in self.parser.list_rows():
+        parser = CsvParser(self.input_path)
+        for row in parser.list_rows():
             csv_data = row[1]
             csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]]
             row_list.append(csv_data)

From d35b87e181aea930684a875b17746e73a8c01611 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 10:07:00 -0600
Subject: [PATCH 11/18] Merge branch 'main' into bwmac/orca-175/add_csv_updater

---
 src/dcqc/suites/suite_abc.py               |   3 +-
 src/dcqc/suites/suites.py                  |   1 +
 src/dcqc/target.py                         |  26 +++++----
 src/dcqc/tests/__init__.py                 |   1 +
 src/dcqc/tests/paired_fastq_parity_test.py |  59 +++++++++++++++++++++
 tests/conftest.py                          |   5 ++
 tests/data/fastq1.fastq                    |   8 +++
 tests/data/fastq2.fastq.gz                 | Bin 0 -> 58 bytes
 tests/data/generate.py                     |   1 +
 tests/data/suite.json                      |   2 +-
 tests/data/target.json                     |   2 +-
 tests/data/test.computed.json              |   2 +-
 tests/data/test.external.json              |   2 +-
 tests/data/test.internal.json              |   2 +-
 tests/data/tests.json                      |   6 +--
 tests/test_target.py                       |  22 +++++++-
 tests/test_tests.py                        |  33 +++++++++++-
 17 files changed, 154 insertions(+), 21 deletions(-)
 create mode 100644 src/dcqc/tests/paired_fastq_parity_test.py
 create mode 100644 tests/data/fastq1.fastq
 create mode 100644 tests/data/fastq2.fastq.gz

diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py
index 38e08a4..36a715d 100644
--- a/src/dcqc/suites/suite_abc.py
+++ b/src/dcqc/suites/suite_abc.py
@@ -228,7 +228,7 @@ def compute_status(self) -> SuiteStatus:
             if test_name in self.required_tests:
                 if test_status == TestStatus.FAIL:
                     self._status = SuiteStatus.RED
-                    break
+                    return self._status
             else:
                 if test_status == TestStatus.FAIL:
                     self._status = SuiteStatus.AMBER
@@ -236,7 +236,6 @@ def compute_status(self) -> SuiteStatus:
 
     def to_dict(self) -> SerializedObject:
         suite_status = self.compute_status()
-        print(suite_status)
         test_dicts = []
         for test in self.tests:
             test_dict = test.to_dict()
diff --git a/src/dcqc/suites/suites.py b/src/dcqc/suites/suites.py
index 33535e5..9227984 100644
--- a/src/dcqc/suites/suites.py
+++ b/src/dcqc/suites/suites.py
@@ -44,3 +44,4 @@ class BAMSuite(FileSuite):
 
 class FastqSuite(FileSuite):
     file_type = FileType.get_file_type("FASTQ")
+    add_tests = (tests.PairedFastqParityTest,)
diff --git a/src/dcqc/target.py b/src/dcqc/target.py
index 4de370f..64ae1b7 100644
--- a/src/dcqc/target.py
+++ b/src/dcqc/target.py
@@ -105,16 +105,9 @@ def __post_init__(self, file_or_files: File | list[File]):
     # switching to @pydantic.dataclasses.dataclass:
     # test_that_paths_are_unchanged_when_not_using_serialize_paths_relative_to
     def ensure_single_file(self):
-        """Ensure that target is only initialized with a single file.
-
-        Args:
-            value: List of files.
-
-        Returns:
-            List of files.
-        """
+        """Ensure that target is only initialized with a single file."""
         if len(self.files) != 1:
-            raise ValueError("Target is restricted to single files")
+            raise ValueError("SingleTarget is restricted to single files")
 
     @property
     def file(self):
@@ -128,3 +121,18 @@ def get_file_type(self) -> FileType:
             The file type object.
         """
         return self.file.get_file_type()
+
+
+@dataclass(init=False)
+class PairedTarget(BaseTarget):
+    """Paired (two-file) target."""
+
+    def __post_init__(self, file_or_files: File | list[File]):
+        """Run validation checks after initialization."""
+        super().__post_init__(file_or_files)
+        self.ensure_two_files()
+
+    def ensure_two_files(self):
+        """Ensure that target is only initialized with two files."""
+        if len(self.files) != 2:
+            raise ValueError("PairedTarget is restricted to two files")
diff --git a/src/dcqc/tests/__init__.py b/src/dcqc/tests/__init__.py
index f38bcb0..dd5e220 100644
--- a/src/dcqc/tests/__init__.py
+++ b/src/dcqc/tests/__init__.py
@@ -7,4 +7,5 @@
 from dcqc.tests.libtiff_info_test import LibTiffInfoTest
 from dcqc.tests.md5_checksum_test import Md5ChecksumTest
 from dcqc.tests.ome_xml_schema_test import OmeXmlSchemaTest
+from dcqc.tests.paired_fastq_parity_test import PairedFastqParityTest
 from dcqc.tests.tiff_tag_306_date_time_test import TiffTag306DateTimeTest
diff --git a/src/dcqc/tests/paired_fastq_parity_test.py b/src/dcqc/tests/paired_fastq_parity_test.py
new file mode 100644
index 0000000..3e9f85e
--- /dev/null
+++ b/src/dcqc/tests/paired_fastq_parity_test.py
@@ -0,0 +1,59 @@
+import gzip
+from pathlib import Path
+from typing import TextIO
+
+from dcqc.target import PairedTarget
+from dcqc.tests.base_test import InternalBaseTest, TestStatus
+
+
+class PairedFastqParityTest(InternalBaseTest):
+    tier = 2
+    target: PairedTarget
+
+    def compute_status(self) -> TestStatus:
+        """Compute test status."""
+        counts = list()
+        for file in self.target.files:
+            path = file.stage()
+            try:
+                count = self._count_fastq_lines(path)
+            except Exception:
+                return TestStatus.FAIL
+            counts.append(count)
+
+        # Check that there counts are all the same (i.e., equal)
+        if len(set(counts)) <= 1:
+            status = TestStatus.PASS
+        else:
+            status = TestStatus.FAIL
+        return status
+
+    def _count_fastq_lines(self, path: Path) -> int:
+        """Count the number of lines in a FASTQ file.
+
+        Args:
+            path: Path to the FASTQ file.
+
+        Returns:
+            Number of lines in the given FASTQ file.
+        """
+        # Source: https://stackoverflow.com/a/1019572/21077945
+        with self._open_fastq(path) as fastq:
+            num_lines = sum(1 for _ in fastq)
+        return num_lines
+
+    def _open_fastq(self, path: Path) -> TextIO:
+        """Open a FASTQ file regardless of compression.
+
+        Args:
+            path: Path to the FASTQ file.
+
+        Returns:
+            Opened FASTQ file (in text mode).
+        """
+        # TODO: This logic should ideally live in the File class, and a
+        #       test should confirm the integrity of compressed files
+        if path.name.endswith(".gz"):
+            return gzip.open(path, "rt")
+        else:
+            return path.open("rt")
diff --git a/tests/conftest.py b/tests/conftest.py
index d4336c7..6a19a94 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -56,6 +56,8 @@ def test_files(get_data):
     txt_path = get_data("test.txt")
     jsonld_path = get_data("example.jsonld")
     tiff_path = get_data("circuit.tif")
+    fastq1_path = get_data("fastq1.fastq")
+    fastq2_path = get_data("fastq2.fastq.gz")
     syn_path = "syn://syn50555279"
     tiff_dirty_datetime_path = get_data("test_image_dirty_datetime.tif")
     good_metadata = {
@@ -74,6 +76,7 @@ def test_files(get_data):
         "file_type": "tiff",
         "md5_checksum": "c7b08f6decb5e7572efbe6074926a843",
     }
+    fastq_metadata = {"file_type": "fastq"}
     tiff_dirty_datetime_metadata = {
         "file_type": "tiff",
         "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268",
@@ -82,6 +85,8 @@ def test_files(get_data):
         "good": File(txt_path.as_posix(), good_metadata),
         "bad": File(txt_path.as_posix(), bad_metadata),
         "tiff": File(tiff_path.as_posix(), tiff_metadata),
+        "fastq1": File(fastq1_path.as_posix(), fastq_metadata),
+        "fastq2": File(fastq2_path.as_posix(), fastq_metadata),
         "jsonld": File(jsonld_path.as_posix(), jsonld_metadata),
         "synapse": File(syn_path, good_metadata),
         "tiff_dirty_datetime": File(
diff --git a/tests/data/fastq1.fastq b/tests/data/fastq1.fastq
new file mode 100644
index 0000000..535d2b0
--- /dev/null
+++ b/tests/data/fastq1.fastq
@@ -0,0 +1,8 @@
+1
+2
+3
+4
+5
+6
+7
+8
diff --git a/tests/data/fastq2.fastq.gz b/tests/data/fastq2.fastq.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d0b52ac8bfa701248b39cd1037605450a9e47f35
GIT binary patch
literal 58
zcmb2|=HLk0(~!czoR(NzQfQ<HrWjZc9%N(?;9=QNzpT)8<Nu^d8kaY1eOzUv5@dIn
Lv*o%p0|Ns9QvDSS

literal 0
HcmV?d00001

diff --git a/tests/data/generate.py b/tests/data/generate.py
index 860268b..1ab489f 100755
--- a/tests/data/generate.py
+++ b/tests/data/generate.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# TODO move this to pytest fixtures
 
 """Generate test files using latest version of dcqc."""
 
diff --git a/tests/data/suite.json b/tests/data/suite.json
index 36df3e3..3f6236d 100644
--- a/tests/data/suite.json
+++ b/tests/data/suite.json
@@ -1,6 +1,7 @@
 {
   "type": "TiffSuite",
   "target": {
+    "id": "001",
     "files": [
       {
         "url": "tests/data/test.txt",
@@ -12,7 +13,6 @@
         "local_path": "tests/data/test.txt"
       }
     ],
-    "id": "001",
     "type": "SingleTarget"
   },
   "suite_status": {
diff --git a/tests/data/target.json b/tests/data/target.json
index 4315039..372396d 100644
--- a/tests/data/target.json
+++ b/tests/data/target.json
@@ -1,4 +1,5 @@
 {
+  "id": "001",
   "files": [
     {
       "url": "tests/data/test.txt",
@@ -10,6 +11,5 @@
       "local_path": "tests/data/test.txt"
     }
   ],
-  "id": "001",
   "type": "SingleTarget"
 }
diff --git a/tests/data/test.computed.json b/tests/data/test.computed.json
index 9a802e5..b1b936a 100644
--- a/tests/data/test.computed.json
+++ b/tests/data/test.computed.json
@@ -4,6 +4,7 @@
   "is_external_test": false,
   "status": "passed",
   "target": {
+    "id": "001",
     "files": [
       {
         "url": "tests/data/test.txt",
@@ -15,7 +16,6 @@
         "local_path": "tests/data/test.txt"
       }
     ],
-    "id": "001",
     "type": "SingleTarget"
   }
 }
diff --git a/tests/data/test.external.json b/tests/data/test.external.json
index 2c40542..0060c81 100644
--- a/tests/data/test.external.json
+++ b/tests/data/test.external.json
@@ -4,6 +4,7 @@
   "is_external_test": true,
   "status": "pending",
   "target": {
+    "id": "001",
     "files": [
       {
         "url": "tests/data/test.txt",
@@ -15,7 +16,6 @@
         "local_path": "tests/data/test.txt"
       }
     ],
-    "id": "001",
     "type": "SingleTarget"
   }
 }
diff --git a/tests/data/test.internal.json b/tests/data/test.internal.json
index f64efc8..55d91be 100644
--- a/tests/data/test.internal.json
+++ b/tests/data/test.internal.json
@@ -4,6 +4,7 @@
   "is_external_test": false,
   "status": "pending",
   "target": {
+    "id": "001",
     "files": [
       {
         "url": "tests/data/test.txt",
@@ -15,7 +16,6 @@
         "local_path": "tests/data/test.txt"
       }
     ],
-    "id": "001",
     "type": "SingleTarget"
   }
 }
diff --git a/tests/data/tests.json b/tests/data/tests.json
index e4678da..21d89de 100644
--- a/tests/data/tests.json
+++ b/tests/data/tests.json
@@ -5,6 +5,7 @@
     "is_external_test": false,
     "status": "pending",
     "target": {
+      "id": "001",
       "files": [
         {
           "url": "tests/data/test.txt",
@@ -16,7 +17,6 @@
           "local_path": "tests/data/test.txt"
         }
       ],
-      "id": "001",
       "type": "SingleTarget"
     }
   },
@@ -26,6 +26,7 @@
     "is_external_test": true,
     "status": "pending",
     "target": {
+      "id": "001",
       "files": [
         {
           "url": "tests/data/test.txt",
@@ -37,7 +38,6 @@
           "local_path": "tests/data/test.txt"
         }
       ],
-      "id": "001",
       "type": "SingleTarget"
     }
   },
@@ -47,6 +47,7 @@
     "is_external_test": false,
     "status": "passed",
     "target": {
+      "id": "001",
       "files": [
         {
           "url": "tests/data/test.txt",
@@ -58,7 +59,6 @@
           "local_path": "tests/data/test.txt"
         }
       ],
-      "id": "001",
       "type": "SingleTarget"
     }
   }
diff --git a/tests/test_target.py b/tests/test_target.py
index 43ecfbf..e6140d7 100644
--- a/tests/test_target.py
+++ b/tests/test_target.py
@@ -1,6 +1,6 @@
 import pytest
 
-from dcqc.target import SingleTarget
+from dcqc.target import PairedTarget, SingleTarget
 
 
 def test_that_a_target_can_be_saved_and_restored_without_changing(test_files):
@@ -26,3 +26,23 @@ def test_for_an_error_when_creating_single_file_target_with_two_files(test_files
     test_file = test_files["good"]
     with pytest.raises(ValueError):
         SingleTarget([test_file, test_file])
+
+
+def test_that_paired_file_target_can_be_created_with_two_files(test_files):
+    test_file = test_files["good"]
+    target = PairedTarget([test_file, test_file])
+    assert len(target.files) == 2
+
+
+def test_for_an_error_when_creating_paired_file_target_with_one_file(test_files):
+    test_file = test_files["good"]
+    with pytest.raises(ValueError):
+        PairedTarget(test_file)
+
+
+def test_for_an_error_when_creating_paired_file_target_with_one_file_in_list(
+    test_files,
+):
+    test_file = test_files["good"]
+    with pytest.raises(ValueError):
+        PairedTarget([test_file])
diff --git a/tests/test_tests.py b/tests/test_tests.py
index bebc9e2..83692ce 100644
--- a/tests/test_tests.py
+++ b/tests/test_tests.py
@@ -4,7 +4,7 @@
 import pytest
 
 from dcqc import tests
-from dcqc.target import SingleTarget
+from dcqc.target import PairedTarget, SingleTarget
 from dcqc.tests import BaseTest, ExternalTestMixin, Process, TestStatus
 
 
@@ -240,3 +240,34 @@ def test_that_the_tifftag306datetimetest_correctly_interprets_exit_code_0_and_1(
         mocker.patch.object(test, "_find_process_outputs", return_value=bad_outputs)
         test_status = test.get_status()
         assert test_status == TestStatus.FAIL
+
+
+def test_that_paired_fastq_parity_test_correctly_passes_identical_fastq_files(
+    test_files,
+):
+    fastq1 = test_files["fastq1"]
+    target = PairedTarget([fastq1, fastq1])
+    test = tests.PairedFastqParityTest(target)
+    test_status = test.get_status()
+    assert test_status == TestStatus.PASS
+
+
+def test_that_paired_fastq_parity_test_correctly_fails_different_fastq_files(
+    test_files,
+):
+    fastq1 = test_files["fastq1"]
+    fastq2 = test_files["fastq2"]
+    target = PairedTarget([fastq1, fastq2])
+    test = tests.PairedFastqParityTest(target)
+    test_status = test.get_status()
+    assert test_status == TestStatus.FAIL
+
+
+def test_that_paired_fastq_parity_test_correctly_handles_compressed_fastq_files(
+    test_files,
+):
+    fastq2 = test_files["fastq2"]
+    target = PairedTarget([fastq2, fastq2])
+    test = tests.PairedFastqParityTest(target)
+    test_status = test.get_status()
+    assert test_status == TestStatus.PASS

From 66111ba114e2e3f1d7ee32c17b80f09843cd6b05 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 10:20:33 -0600
Subject: [PATCH 12/18] updates generate.py to include suites.json

---
 tests/data/generate.py                | 13 ++++++
 tests/data/suites.json                | 12 +++---
 tests/data/suites_files/suites_1.json | 59 +++++++++++++++++++++++++++
 tests/data/suites_files/suites_2.json | 59 +++++++++++++++++++++++++++
 tests/data/suites_files/suites_3.json | 59 +++++++++++++++++++++++++++
 5 files changed, 196 insertions(+), 6 deletions(-)
 create mode 100644 tests/data/suites_files/suites_1.json
 create mode 100644 tests/data/suites_files/suites_2.json
 create mode 100644 tests/data/suites_files/suites_3.json

diff --git a/tests/data/generate.py b/tests/data/generate.py
index 1ab489f..4e4ef3c 100755
--- a/tests/data/generate.py
+++ b/tests/data/generate.py
@@ -11,6 +11,7 @@
 from dcqc import tests
 from dcqc.file import File
 from dcqc.mixins import SerializableMixin
+from dcqc.parsers import JsonParser
 from dcqc.reports import JsonReport
 from dcqc.suites.suite_abc import SuiteABC
 from dcqc.target import SingleTarget
@@ -60,3 +61,15 @@ def export(obj: SerializableMixin | Sequence[SerializableMixin], filename: str):
 skipped_tests = ["LibTiffInfoTest"]
 suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests)
 export(suite, "suite.json")
+
+# suites.json
+input_jsons = [
+    Path(file_path)
+    for file_path in [
+        "tests/data/suites_files/suites_1.json",
+        "tests/data/suites_files/suites_2.json",
+        "tests/data/suites_files/suites_3.json",
+    ]
+]
+suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons]
+export(suites, "suites.json")
diff --git a/tests/data/suites.json b/tests/data/suites.json
index 107a5c6..aaf72e4 100644
--- a/tests/data/suites.json
+++ b/tests/data/suites.json
@@ -19,8 +19,8 @@
     "suite_status": {
       "required_tests": [
         "Md5ChecksumTest",
-        "LibTiffInfoTest",
-        "FileExtensionTest"
+        "FileExtensionTest",
+        "LibTiffInfoTest"
       ],
       "skipped_tests": [],
       "status": "GREEN"
@@ -78,8 +78,8 @@
     "suite_status": {
       "required_tests": [
         "Md5ChecksumTest",
-        "LibTiffInfoTest",
-        "FileExtensionTest"
+        "FileExtensionTest",
+        "LibTiffInfoTest"
       ],
       "skipped_tests": [],
       "status": "RED"
@@ -137,8 +137,8 @@
     "suite_status": {
       "required_tests": [
         "Md5ChecksumTest",
-        "LibTiffInfoTest",
-        "FileExtensionTest"
+        "FileExtensionTest",
+        "LibTiffInfoTest"
       ],
       "skipped_tests": [],
       "status": "AMBER"
diff --git a/tests/data/suites_files/suites_1.json b/tests/data/suites_files/suites_1.json
new file mode 100644
index 0000000..96e0607
--- /dev/null
+++ b/tests/data/suites_files/suites_1.json
@@ -0,0 +1,59 @@
+{
+    "type": "TiffSuite",
+    "target": {
+        "id": "0001",
+        "files": [
+            {
+                "url": "syn://syn51585496",
+                "metadata": {
+                    "md5_checksum": "c7b08f6decb5e7572efbe6074926a843"
+                },
+                "type": "TIFF",
+                "name": "circuit.tif",
+                "local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif"
+            }
+        ],
+        "type": "SingleTarget"
+    },
+    "suite_status": {
+        "required_tests": [
+            "Md5ChecksumTest",
+            "LibTiffInfoTest",
+            "FileExtensionTest"
+        ],
+        "skipped_tests": [],
+        "status": "GREEN"
+    },
+    "tests": [
+        {
+            "type": "FileExtensionTest",
+            "tier": 1,
+            "is_external_test": false,
+            "status": "passed"
+        },
+        {
+            "type": "GrepDateTest",
+            "tier": 4,
+            "is_external_test": true,
+            "status": "passed"
+        },
+        {
+            "type": "LibTiffInfoTest",
+            "tier": 2,
+            "is_external_test": true,
+            "status": "passed"
+        },
+        {
+            "type": "Md5ChecksumTest",
+            "tier": 1,
+            "is_external_test": false,
+            "status": "passed"
+        },
+        {
+            "type": "TiffTag306DateTimeTest",
+            "tier": 4,
+            "is_external_test": true,
+            "status": "passed"
+        }
+    ]
+}
diff --git a/tests/data/suites_files/suites_2.json b/tests/data/suites_files/suites_2.json
new file mode 100644
index 0000000..1cc45bc
--- /dev/null
+++ b/tests/data/suites_files/suites_2.json
@@ -0,0 +1,59 @@
+{
+    "type": "TiffSuite",
+    "target": {
+        "id": "0002",
+        "files": [
+            {
+                "url": "syn://syn51585494",
+                "metadata": {
+                    "md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404"
+                },
+                "type": "TIFF",
+                "name": "test_contains_word_date.tif",
+                "local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif"
+            }
+        ],
+        "type": "SingleTarget"
+    },
+    "suite_status": {
+        "required_tests": [
+            "Md5ChecksumTest",
+            "LibTiffInfoTest",
+            "FileExtensionTest"
+        ],
+        "skipped_tests": [],
+        "status": "RED"
+    },
+    "tests": [
+        {
+            "type": "FileExtensionTest",
+            "tier": 1,
+            "is_external_test": false,
+            "status": "passed"
+        },
+        {
+            "type": "GrepDateTest",
+            "tier": 4,
+            "is_external_test": true,
+            "status": "failed"
+        },
+        {
+            "type": "LibTiffInfoTest",
+            "tier": 2,
+            "is_external_test": true,
+            "status": "failed"
+        },
+        {
+            "type": "Md5ChecksumTest",
+            "tier": 1,
+            "is_external_test": false,
+            "status": "passed"
+        },
+        {
+            "type": "TiffTag306DateTimeTest",
+            "tier": 4,
+            "is_external_test": true,
+            "status": "passed"
+        }
+    ]
+}
diff --git a/tests/data/suites_files/suites_3.json b/tests/data/suites_files/suites_3.json
new file mode 100644
index 0000000..80eca4a
--- /dev/null
+++ b/tests/data/suites_files/suites_3.json
@@ -0,0 +1,59 @@
+{
+    "type": "TiffSuite",
+    "target": {
+        "id": "0003",
+        "files": [
+            {
+                "url": "syn://syn51585495",
+                "metadata": {
+                    "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268"
+                },
+                "type": "TIFF",
+                "name": "test_image_dirty_datetime.tif",
+                "local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif"
+            }
+        ],
+        "type": "SingleTarget"
+    },
+    "suite_status": {
+        "required_tests": [
+            "Md5ChecksumTest",
+            "LibTiffInfoTest",
+            "FileExtensionTest"
+        ],
+        "skipped_tests": [],
+        "status": "AMBER"
+    },
+    "tests": [
+        {
+            "type": "FileExtensionTest",
+            "tier": 1,
+            "is_external_test": false,
+            "status": "passed"
+        },
+        {
+            "type": "GrepDateTest",
+            "tier": 4,
+            "is_external_test": true,
+            "status": "passed"
+        },
+        {
+            "type": "LibTiffInfoTest",
+            "tier": 2,
+            "is_external_test": true,
+            "status": "passed"
+        },
+        {
+            "type": "Md5ChecksumTest",
+            "tier": 1,
+            "is_external_test": false,
+            "status": "passed"
+        },
+        {
+            "type": "TiffTag306DateTimeTest",
+            "tier": 4,
+            "is_external_test": true,
+            "status": "failed"
+        }
+    ]
+}

From dfbaad33f0f7386136e9b459d15dbc32a7a940b9 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 10:30:02 -0600
Subject: [PATCH 13/18] add back multi file testing dict

---
 tests/test_updaters.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index e268069..b0c3e66 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -24,7 +24,9 @@ class TestCsvUpdater:
     mock_dict_multi = {
         "syn://syn51585496": "GREEN",
         "syn://syn51585494": "GREEN",
+        "syn://syn51585494": "RED",
         "syn://syn51585495": "GREEN",
+        "syn://syn51585495": "AMBER",
     }
 
     def __init__(self, get_data):

From 6bf487e1a964af5260207fff5b6b3f907612d8d3 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 11:49:30 -0600
Subject: [PATCH 14/18] updates test_updaters

---
 src/dcqc/updaters.py       | 24 ++++++-----
 tests/conftest.py          | 35 +++++++++++++++-
 tests/data/empty_input.csv |  1 +
 tests/test_updaters.py     | 83 ++++++++++++++++----------------------
 4 files changed, 83 insertions(+), 60 deletions(-)
 create mode 100644 tests/data/empty_input.csv

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index b487916..048cc29 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -39,17 +39,19 @@ def update(self, suites: List[SuiteABC]):
         # Create CSV data structure
         row_list = []
         parser = CsvParser(self.input_path)
-        for row in parser.list_rows():
-            csv_data = row[1]
+        for _, csv_data in parser.list_rows():
             csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]]
             row_list.append(csv_data)
 
-        keys = row_list[0].keys()
-        # Export updated CSV
-        self.output_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(
-            str(self.output_path), "w+", newline="", encoding="utf-8"
-        ) as output_file:
-            dict_writer = DictWriter(output_file, keys)
-            dict_writer.writeheader()
-            dict_writer.writerows(row_list)
+        if row_list:
+            keys = row_list[0].keys()
+            # Export updated CSV
+            self.output_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(
+                str(self.output_path), "w+", newline="", encoding="utf-8"
+            ) as output_file:
+                dict_writer = DictWriter(output_file, keys)
+                dict_writer.writeheader()
+                dict_writer.writerows(row_list)
+        else:
+            raise ValueError("No rows found in input CSV")
diff --git a/tests/conftest.py b/tests/conftest.py
index 6a19a94..d118eb9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,12 +10,13 @@
 from datetime import datetime
 from getpass import getuser
 from pathlib import Path
+from unittest.mock import MagicMock
 from uuid import uuid4
 
 import pytest
 
 from dcqc.file import File
-from dcqc.suites.suite_abc import SuiteABC
+from dcqc.suites.suite_abc import SuiteABC, SuiteStatus
 from dcqc.target import SingleTarget
 
 CNFPATH = Path(__file__).resolve()
@@ -129,3 +130,35 @@ def _get_output(filename: str) -> Path:
         return output
 
     yield _get_output
+
+
+@pytest.fixture
+def mocked_suites_single_targets():
+    mock_dict_single = {
+        "syn://syn51585496": SuiteStatus.GREEN,
+        "syn://syn51585494": SuiteStatus.RED,
+        "syn://syn51585495": SuiteStatus.AMBER,
+    }
+    mocked_suites = []
+    for url, status in mock_dict_single.items():
+        suite = MagicMock(cls=SuiteABC)
+        suite.target.files[0].url = url
+        suite.get_status.return_value = status
+        mocked_suites.append(suite)
+    return mocked_suites
+
+
+@pytest.fixture
+def mocked_suites_multi_targets():
+    mock_dict_multi = {
+        "syn://syn51585496": SuiteStatus.GREEN,
+        "syn://syn51585494": SuiteStatus.RED,
+        "syn://syn51585495": SuiteStatus.AMBER,
+    }
+    mocked_suites = []
+    for url, status in mock_dict_multi.items():
+        suite = MagicMock(cls=SuiteABC)
+        suite.target.files[0].url = url
+        suite.get_status.return_value = status
+        mocked_suites.append(suite)
+    return mocked_suites
diff --git a/tests/data/empty_input.csv b/tests/data/empty_input.csv
new file mode 100644
index 0000000..1255d77
--- /dev/null
+++ b/tests/data/empty_input.csv
@@ -0,0 +1 @@
+url,file_type,md5_checksum
diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index b0c3e66..6bb2380 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -1,53 +1,40 @@
 import csv
-from unittest.mock import MagicMock
 
-from dcqc.suites.suite_abc import SuiteABC
+import pytest
+
 from dcqc.updaters import CsvUpdater
 
 
-def generate_mocked_suites(mock_dict):
-    mocked_suites = []
-    for url, status in mock_dict.items():
-        suite = MagicMock(SuiteABC)
-        suite.target.files[0].url = url
-        suite.get_status.return_value = status
-        mocked_suites.append(suite)
-        return mocked_suites
-
-
-class TestCsvUpdater:
-    mock_dict_single = {
-        "syn://syn51585496": "GREEN",
-        "syn://syn51585494": "RED",
-        "syn://syn51585495": "AMBER",
-    }
-    mock_dict_multi = {
-        "syn://syn51585496": "GREEN",
-        "syn://syn51585494": "GREEN",
-        "syn://syn51585494": "RED",
-        "syn://syn51585495": "GREEN",
-        "syn://syn51585495": "AMBER",
-    }
-
-    def __init__(self, get_data):
-        self.mocked_suites_single = generate_mocked_suites(self.mock_dict_single)
-        self.mocked_suites_multi = generate_mocked_suites(self.mock_dict_multi)
-        self.input_file = get_data("input.csv")
-        self.output_file = get_data("output.csv")
-        self.updater = CsvUpdater(self.input_file, self.output_file)
-
-    def get_dcqc_status_list_from_file(self, filename):
-        with open(filename, "r") as file:
-            reader = csv.DictReader(file)
-            status_list = [row["dcqc_status"] for row in reader]
-        return status_list
-
-    def test_that_csv_updater_updates_csv_as_expected_with_single_targets(self):
-        self.updater.update(self.mocked_suites_single)
-        status_list = self.get_dcqc_status_list_from_file(self.output_file)
-        assert status_list == ["GREEN", "RED", "AMBER"]
-
-    def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(self):
-        self.updater.update(self.mocked_suites_multi)
-        status_list = self.get_dcqc_status_list_from_file(self.output_file)
-        assert status_list == ["GREEN", "RED", "AMBER"]
+def get_dcqc_status_list_from_file(filename):
+    with open(filename, "r") as file:
+        reader = csv.DictReader(file)
+        status_list = [row["dcqc_status"] for row in reader]
+    return status_list
+
+
+def test_that_csv_updater_updates_csv_as_expected_with_single_targets(
+    get_data, mocked_suites_single_targets
+):
+    input_file = get_data("input.csv")
+    output_file = get_data("output.csv")
+    updater = CsvUpdater(input_file, output_file)
+    updater.update(mocked_suites_single_targets)
+    status_list = get_dcqc_status_list_from_file(output_file)
+    assert status_list == ["GREEN", "RED", "AMBER"]
+
+
+def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(
+    get_data, mocked_suites_multi_targets
+):
+    input_file = get_data("input.csv")
+    output_file = get_data("output.csv")
+    updater = CsvUpdater(input_file, output_file)
+    updater.update(mocked_suites_multi_targets)
+    status_list = get_dcqc_status_list_from_file(output_file)
+    assert status_list == ["GREEN", "RED", "AMBER"]
+
+
+def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets):
+    with pytest.raises(ValueError):
+        empty_updater = CsvUpdater(get_data("empty_input.csv"), get_data("output.csv"))
+        empty_updater.update(mocked_suites_single_targets)

From e6d853cbf274feb3e17d2370f9023bb45dfe4bc5 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 13:59:45 -0600
Subject: [PATCH 15/18] adds TODO for multi-file

---
 src/dcqc/updaters.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index 048cc29..a9ad002 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -21,6 +21,7 @@ def __init__(self, input_path: Path, output_path: Path):
     def update(self, suites: List[SuiteABC]):
         suite_dict = defaultdict(list)
         # {url: [list_of_statuses]} data structure to allow for multi-file targets
+        # TODO add support for suites with multiple files in them (multi-file targets)
         for suite in suites:
             url = suite.target.files[0].url
             status = suite.get_status()

From c7262f647cf584123d5de5b4611d7bbd9127eeac Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 14:15:58 -0600
Subject: [PATCH 16/18] comment out multi_target testing

---
 tests/conftest.py      | 28 ++++++++++++++--------------
 tests/test_updaters.py | 18 +++++++++---------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index d118eb9..a9bd72e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -148,17 +148,17 @@ def mocked_suites_single_targets():
     return mocked_suites
 
 
-@pytest.fixture
-def mocked_suites_multi_targets():
-    mock_dict_multi = {
-        "syn://syn51585496": SuiteStatus.GREEN,
-        "syn://syn51585494": SuiteStatus.RED,
-        "syn://syn51585495": SuiteStatus.AMBER,
-    }
-    mocked_suites = []
-    for url, status in mock_dict_multi.items():
-        suite = MagicMock(cls=SuiteABC)
-        suite.target.files[0].url = url
-        suite.get_status.return_value = status
-        mocked_suites.append(suite)
-    return mocked_suites
+# @pytest.fixture
+# def mocked_suites_multi_targets():
+#     mock_dict_multi = {
+#         "syn://syn51585496": SuiteStatus.GREEN,
+#         "syn://syn51585494": SuiteStatus.RED,
+#         "syn://syn51585495": SuiteStatus.AMBER,
+#     }
+#     mocked_suites = []
+#     for url, status in mock_dict_multi.items():
+#         suite = MagicMock(cls=SuiteABC)
+#         suite.target.files[0].url = url
+#         suite.get_status.return_value = status
+#         mocked_suites.append(suite)
+#     return mocked_suites
diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index 6bb2380..9122a50 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -23,15 +23,15 @@ def test_that_csv_updater_updates_csv_as_expected_with_single_targets(
     assert status_list == ["GREEN", "RED", "AMBER"]
 
 
-def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(
-    get_data, mocked_suites_multi_targets
-):
-    input_file = get_data("input.csv")
-    output_file = get_data("output.csv")
-    updater = CsvUpdater(input_file, output_file)
-    updater.update(mocked_suites_multi_targets)
-    status_list = get_dcqc_status_list_from_file(output_file)
-    assert status_list == ["GREEN", "RED", "AMBER"]
+# def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(
+#     get_data, mocked_suites_multi_targets
+# ):
+#     input_file = get_data("input.csv")
+#     output_file = get_data("output.csv")
+#     updater = CsvUpdater(input_file, output_file)
+#     updater.update(mocked_suites_multi_targets)
+#     status_list = get_dcqc_status_list_from_file(output_file)
+#     assert status_list == ["GREEN", "RED", "AMBER"]
 
 
 def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets):

From e9dffd0958fcc566af40c6328f858412b22a369d Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 16:35:47 -0600
Subject: [PATCH 17/18] Removes unneeded None handling in `update`

---
 src/dcqc/updaters.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index a9ad002..0ecb9dc 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -33,10 +33,8 @@ def update(self, suites: List[SuiteABC]):
                 collapsed_dict[url] = "RED"
             elif "AMBER" in statuses:
                 collapsed_dict[url] = "AMBER"
-            elif "GREEN" in statuses:
-                collapsed_dict[url] = "GREEN"
             else:
-                collapsed_dict[url] = "NONE"
+                collapsed_dict[url] = "GREEN"
         # Create CSV data structure
         row_list = []
         parser = CsvParser(self.input_path)

From c21bc9b242ff6fd02f37d988bd644eb1e1536f02 Mon Sep 17 00:00:00 2001
From: Brad Macdonald <bwm03570@gmail.com>
Date: Tue, 13 Jun 2023 17:03:29 -0600
Subject: [PATCH 18/18] adds NONE check to updater test

---
 src/dcqc/updaters.py                       |  6 ++++--
 tests/conftest.py                          |  1 +
 tests/data/test_input.csv                  |  5 +++++
 tests/data/{output.csv => test_output.csv} |  1 +
 tests/test_updaters.py                     | 20 +++++++++++---------
 5 files changed, 22 insertions(+), 11 deletions(-)
 create mode 100644 tests/data/test_input.csv
 rename tests/data/{output.csv => test_output.csv} (77%)

diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py
index 0ecb9dc..6002643 100644
--- a/src/dcqc/updaters.py
+++ b/src/dcqc/updaters.py
@@ -21,7 +21,7 @@ def __init__(self, input_path: Path, output_path: Path):
     def update(self, suites: List[SuiteABC]):
         suite_dict = defaultdict(list)
         # {url: [list_of_statuses]} data structure to allow for multi-file targets
-        # TODO add support for suites with multiple files in them (multi-file targets)
+        # TODO add support for suites with multiple files in them (multi)
         for suite in suites:
             url = suite.target.files[0].url
             status = suite.get_status()
@@ -33,8 +33,10 @@ def update(self, suites: List[SuiteABC]):
                 collapsed_dict[url] = "RED"
             elif "AMBER" in statuses:
                 collapsed_dict[url] = "AMBER"
-            else:
+            elif "GREEN" in statuses:
                 collapsed_dict[url] = "GREEN"
+            else:
+                collapsed_dict[url] = "NONE"
         # Create CSV data structure
         row_list = []
         parser = CsvParser(self.input_path)
diff --git a/tests/conftest.py b/tests/conftest.py
index a9bd72e..69a6736 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -138,6 +138,7 @@ def mocked_suites_single_targets():
         "syn://syn51585496": SuiteStatus.GREEN,
         "syn://syn51585494": SuiteStatus.RED,
         "syn://syn51585495": SuiteStatus.AMBER,
+        "syn://syn51585493": SuiteStatus.NONE,
     }
     mocked_suites = []
     for url, status in mock_dict_single.items():
diff --git a/tests/data/test_input.csv b/tests/data/test_input.csv
new file mode 100644
index 0000000..6501a6a
--- /dev/null
+++ b/tests/data/test_input.csv
@@ -0,0 +1,5 @@
+url,file_type,md5_checksum
+syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9
+syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff
+syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9
+syn://syn51585493,TIFF,38b86a456d1f441008986c6f798d5ef9
diff --git a/tests/data/output.csv b/tests/data/test_output.csv
similarity index 77%
rename from tests/data/output.csv
rename to tests/data/test_output.csv
index ab1167f..e37ee65 100644
--- a/tests/data/output.csv
+++ b/tests/data/test_output.csv
@@ -2,3 +2,4 @@ url,file_type,md5_checksum,dcqc_status
 syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9,GREEN
 syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff,RED
 syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9,AMBER
+syn://syn51585493,TIFF,38b86a456d1f441008986c6f798d5ef9,NONE
diff --git a/tests/test_updaters.py b/tests/test_updaters.py
index 9122a50..65198fe 100644
--- a/tests/test_updaters.py
+++ b/tests/test_updaters.py
@@ -15,12 +15,20 @@ def get_dcqc_status_list_from_file(filename):
 def test_that_csv_updater_updates_csv_as_expected_with_single_targets(
     get_data, mocked_suites_single_targets
 ):
-    input_file = get_data("input.csv")
-    output_file = get_data("output.csv")
+    input_file = get_data("test_input.csv")
+    output_file = get_data("test_output.csv")
     updater = CsvUpdater(input_file, output_file)
     updater.update(mocked_suites_single_targets)
     status_list = get_dcqc_status_list_from_file(output_file)
-    assert status_list == ["GREEN", "RED", "AMBER"]
+    assert status_list == ["GREEN", "RED", "AMBER", "NONE"]
+
+
+def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets):
+    with pytest.raises(ValueError):
+        empty_updater = CsvUpdater(
+            get_data("empty_input.csv"), get_data("test_output.csv")
+        )
+        empty_updater.update(mocked_suites_single_targets)
 
 
 # def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(
@@ -32,9 +40,3 @@ def test_that_csv_updater_updates_csv_as_expected_with_single_targets(
 #     updater.update(mocked_suites_multi_targets)
 #     status_list = get_dcqc_status_list_from_file(output_file)
 #     assert status_list == ["GREEN", "RED", "AMBER"]
-
-
-def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets):
-    with pytest.raises(ValueError):
-        empty_updater = CsvUpdater(get_data("empty_input.csv"), get_data("output.csv"))
-        empty_updater.update(mocked_suites_single_targets)