From 8d2109776bbb3e5eaf2c42536450cf8e0a4f578d Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Fri, 9 Jun 2023 15:37:58 -0600 Subject: [PATCH 01/18] adds update_csv command and test --- src/dcqc/main.py | 43 ++++++++++ tests/data/input.csv | 4 + tests/data/suites.json | 179 +++++++++++++++++++++++++++++++++++++++++ tests/test_main.py | 17 ++++ 4 files changed, 243 insertions(+) create mode 100644 tests/data/input.csv create mode 100644 tests/data/suites.json diff --git a/src/dcqc/main.py b/src/dcqc/main.py index a9da6b3..566e6be 100644 --- a/src/dcqc/main.py +++ b/src/dcqc/main.py @@ -204,3 +204,46 @@ def qc_file( report = JsonReport() suite_json = report.generate(suite) json.dump(suite_json, sys.stdout, indent=2) + + +@app.command() +def update_csv( + suites_file: Path = input_path_arg, + input_file: Path = input_path_arg, + output_file: Path = output_path_arg, +): + """Update input CSV file with dcqc_status column""" + suites = JsonParser.parse_objects(suites_file, SuiteABC) + suite_dict = {} + for suite in suites: + url = suite.target.files[0].url + status = suite._status.value + if not suite_dict.get(url): + suite_dict[url] = [status] + else: + suite_dict[url].append(status) + + collapsed_dict = {} + for url, statuses in suite_dict.items(): + if "RED" in statuses: + collapsed_dict[url] = "RED" + elif "AMBER" in statuses: + collapsed_dict[url] = "AMBER" + elif "GREEN" in statuses: + collapsed_dict[url] = "GREEN" + else: + collapsed_dict[url] = "NONE" + + parser = CsvParser(input_file) + row_list = [] + for row in parser.list_rows(): + row_list.append(row[1]) + for row in row_list: + row["dcqc_status"] = collapsed_dict[row["url"]] + + keys = row_list[0].keys() + + with open(output_file, "w", newline="", encoding="utf-8") as output_file: + dict_writer = DictWriter(output_file, keys) + dict_writer.writeheader() + dict_writer.writerows(row_list) diff --git a/tests/data/input.csv b/tests/data/input.csv new file mode 100644 index 0000000..946fcf8 --- /dev/null +++ b/tests/data/input.csv @@ -0,0 +1,4 @@ +url,file_type,md5_checksum +syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9 +syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff +syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9 diff --git a/tests/data/suites.json b/tests/data/suites.json new file mode 100644 index 0000000..107a5c6 --- /dev/null +++ b/tests/data/suites.json @@ -0,0 +1,179 @@ +[ + { + "type": "TiffSuite", + "target": { + "id": "0001", + "files": [ + { + "url": "syn://syn51585496", + "metadata": { + "md5_checksum": "c7b08f6decb5e7572efbe6074926a843" + }, + "type": "TIFF", + "name": "circuit.tif", + "local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "GREEN" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] + }, + { + "type": "TiffSuite", + "target": { + "id": "0002", + "files": [ + { + "url": "syn://syn51585494", + "metadata": { + "md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404" + }, + "type": "TIFF", + "name": "test_contains_word_date.tif", + "local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "RED" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "failed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] + }, + { + "type": "TiffSuite", + "target": { + "id": "0003", + "files": [ + { + "url": "syn://syn51585495", + "metadata": { + "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268" + }, + "type": "TIFF", + "name": "test_image_dirty_datetime.tif", + "local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "AMBER" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + } + ] + } +] diff --git a/tests/test_main.py b/tests/test_main.py index 04218b4..62e90fe 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -134,3 +134,20 @@ def test_qc_file(get_data): ] result = run_command(args) check_command_result(result) + + +def test_update_csv(get_data, get_output): + suites_path = get_data("suites.json") + input_path = get_data("input.csv") + output_path = get_output("update_csv") / "output.csv" + output_path.unlink(missing_ok=True) + + args = [ + "update-csv", + suites_path, + input_path, + output_path, + ] + result = run_command(args) + check_command_result(result) + assert output_path.exists() From c08e61612425a8d98753934ce543b1d14874d29c Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 11:22:48 -0600 Subject: [PATCH 02/18] creates csv updater --- src/dcqc/main.py | 36 +++---------------------- src/dcqc/suites/suite_abc.py | 6 +++++ src/dcqc/updaters.py | 52 ++++++++++++++++++++++++++++++++++++ tests/data/output.csv | 4 +++ 4 files changed, 65 insertions(+), 33 deletions(-) create mode 100644 src/dcqc/updaters.py create mode 100644 tests/data/output.csv diff --git a/src/dcqc/main.py b/src/dcqc/main.py index 566e6be..72583a8 100644 --- a/src/dcqc/main.py +++ b/src/dcqc/main.py @@ -13,6 +13,7 @@ from dcqc.suites.suite_abc import SuiteABC from dcqc.target import SingleTarget from dcqc.tests.base_test import BaseTest, ExternalTestMixin +from dcqc.updaters import CsvUpdater # Make commands optional to allow for `dcqc --version` app = Typer(invoke_without_command=True) @@ -214,36 +215,5 @@ def update_csv( ): """Update input CSV file with dcqc_status column""" suites = JsonParser.parse_objects(suites_file, SuiteABC) - suite_dict = {} - for suite in suites: - url = suite.target.files[0].url - status = suite._status.value - if not suite_dict.get(url): - suite_dict[url] = [status] - else: - suite_dict[url].append(status) - - collapsed_dict = {} - for url, statuses in suite_dict.items(): - if "RED" in statuses: - collapsed_dict[url] = "RED" - elif "AMBER" in statuses: - collapsed_dict[url] = "AMBER" - elif "GREEN" in statuses: - collapsed_dict[url] = "GREEN" - else: - collapsed_dict[url] = "NONE" - - parser = CsvParser(input_file) - row_list = [] - for row in parser.list_rows(): - row_list.append(row[1]) - for row in row_list: - row["dcqc_status"] = collapsed_dict[row["url"]] - - keys = row_list[0].keys() - - with open(output_file, "w", newline="", encoding="utf-8") as output_file: - dict_writer = DictWriter(output_file, keys) - dict_writer.writeheader() - dict_writer.writerows(row_list) + updater = CsvUpdater(input_file, output_file) + updater.update(suites) diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py index ed6b51e..76c91ae 100644 --- a/src/dcqc/suites/suite_abc.py +++ b/src/dcqc/suites/suite_abc.py @@ -292,3 +292,9 @@ def from_dict(cls, dictionary: SerializedObject) -> SuiteABC: def get_base_class(cls): """Retrieve base class.""" return SuiteABC + + def get_status(self, compute_ok: bool = True) -> SuiteStatus: + """Compute (if applicable) and return the suite status.""" + if self._status == SuiteStatus.NONE and compute_ok: + self._status = self.compute_status() + return self._status diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py new file mode 100644 index 0000000..3b73c06 --- /dev/null +++ b/src/dcqc/updaters.py @@ -0,0 +1,52 @@ +from csv import DictWriter +from pathlib import Path +from dcqc.parsers import CsvParser, JsonParser +from dcqc.suites.suite_abc import SuiteABC +from typing import List, Dict + + +class CsvUpdater: + input_path: Path + output_path: Path + parser: CsvParser + + def __init__(self, input_path: Path, output_path: Path): + self.input_path = input_path + self.output_path = output_path + self.parser = CsvParser(input_path) + + def update(self, suites: List[SuiteABC]): + suite_dict: Dict[str, List[str]] = {} # mypy made me do this, not sure why + for suite in suites: + url = suite.target.files[0].url + status = suite.get_status() + if not suite_dict.get(url): + suite_dict[url] = [status.value] + else: + suite_dict[url].append(status.value) + + collapsed_dict = {} + for url, statuses in suite_dict.items(): + if "RED" in statuses: + collapsed_dict[url] = "RED" + elif "AMBER" in statuses: + collapsed_dict[url] = "AMBER" + elif "GREEN" in statuses: + collapsed_dict[url] = "GREEN" + else: + collapsed_dict[url] = "NONE" + + row_list = [] + for row in self.parser.list_rows(): + csv_data = row[1] + csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]] + row_list.append(csv_data) + + keys = row_list[0].keys() + + with open( + str(self.output_path), "w", newline="", encoding="utf-8" + ) as output_file: + dict_writer = DictWriter(output_file, keys) + dict_writer.writeheader() + dict_writer.writerows(row_list) diff --git a/tests/data/output.csv b/tests/data/output.csv new file mode 100644 index 0000000..ab1167f --- /dev/null +++ b/tests/data/output.csv @@ -0,0 +1,4 @@ +url,file_type,md5_checksum,dcqc_status +syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9,GREEN +syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff,RED +syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9,AMBER From 4a0f4010db8746a71dd79cac8f1bba6e906b003f Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 14:30:35 -0600 Subject: [PATCH 03/18] adds CsvUpdater test --- src/dcqc/updaters.py | 17 ++++++++++------- tests/test_updaters.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 tests/test_updaters.py diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index 3b73c06..3986912 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -1,8 +1,9 @@ from csv import DictWriter from pathlib import Path -from dcqc.parsers import CsvParser, JsonParser +from typing import Dict, List + +from dcqc.parsers import CsvParser from dcqc.suites.suite_abc import SuiteABC -from typing import List, Dict class CsvUpdater: @@ -11,12 +12,14 @@ class CsvUpdater: parser: CsvParser def __init__(self, input_path: Path, output_path: Path): - self.input_path = input_path self.output_path = output_path self.parser = CsvParser(input_path) def update(self, suites: List[SuiteABC]): - suite_dict: Dict[str, List[str]] = {} # mypy made me do this, not sure why + suite_dict: Dict[ + str, List[str] + ] = {} # mypy made me do this, but only here. not sure why + # {url: [list_of_statuses]} data structure to allow for multi-file targets for suite in suites: url = suite.target.files[0].url status = suite.get_status() @@ -24,7 +27,7 @@ def update(self, suites: List[SuiteABC]): suite_dict[url] = [status.value] else: suite_dict[url].append(status.value) - + # Evaluate dcqc_status for each url collapsed_dict = {} for url, statuses in suite_dict.items(): if "RED" in statuses: @@ -35,7 +38,7 @@ def update(self, suites: List[SuiteABC]): collapsed_dict[url] = "GREEN" else: collapsed_dict[url] = "NONE" - + # Create CSV data structure row_list = [] for row in self.parser.list_rows(): csv_data = row[1] @@ -43,7 +46,7 @@ def update(self, suites: List[SuiteABC]): row_list.append(csv_data) keys = row_list[0].keys() - + # Export updated CSV with open( str(self.output_path), "w", newline="", encoding="utf-8" ) as output_file: diff --git a/tests/test_updaters.py b/tests/test_updaters.py new file mode 100644 index 0000000..109ac65 --- /dev/null +++ b/tests/test_updaters.py @@ -0,0 +1,37 @@ +import csv + +from unittest.mock import MagicMock + +from dcqc.updaters import CsvUpdater +from pathlib import Path +from dcqc.suites.suite_abc import SuiteABC + + +class TestCsvUpdater: + mock_dict = { + "syn://syn51585496": "GREEN", + "syn://syn51585494": "RED", + "syn://syn51585495": "AMBER", + } + + def __init__(self): + self.mocked_suites = [] + for url, status in self.mock_dict.items(): + suite = MagicMock(SuiteABC) + suite.target.files[0].url = url + suite.get_status.return_value = status + self.mocked_suites.append(suite) + + def get_dcqc_status_list_from_file(self, filename): + with open(filename, "r") as file: + reader = csv.DictReader(file) + status_list = [row["dcqc_status"] for row in reader] + return status_list + + def test_that_csv_updater_updates_csv_as_expected(self, get_data): + input_file = get_data("input.csv") + output_file = get_data("output.csv") + updater = CsvUpdater(input_file, output_file) + updater.update(self.mocked_suites) + status_list = self.get_dcqc_status_list_from_file(output_file) + assert status_list == ["GREEN", "RED", "AMBER"] From 59d9825cebc70e33525f3c421e890466fd1540c0 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 14:31:05 -0600 Subject: [PATCH 04/18] removes not needed deps --- tests/test_updaters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_updaters.py b/tests/test_updaters.py index 109ac65..5a4af65 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -3,7 +3,6 @@ from unittest.mock import MagicMock from dcqc.updaters import CsvUpdater -from pathlib import Path from dcqc.suites.suite_abc import SuiteABC From 1e93df9a71df2db1bbf627cbe3a91afab9d9a74c Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 14:32:57 -0600 Subject: [PATCH 05/18] update after pre-commit --- tests/test_updaters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_updaters.py b/tests/test_updaters.py index 5a4af65..2ba24a8 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -1,9 +1,8 @@ import csv - from unittest.mock import MagicMock -from dcqc.updaters import CsvUpdater from dcqc.suites.suite_abc import SuiteABC +from dcqc.updaters import CsvUpdater class TestCsvUpdater: From 7b433d3439e9de6663a19a3103440dc08abd6e4e Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 15:47:12 -0600 Subject: [PATCH 06/18] adds coverage for test_suites --- tests/test_suites.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_suites.py b/tests/test_suites.py index 46b4a59..a086daf 100644 --- a/tests/test_suites.py +++ b/tests/test_suites.py @@ -1,5 +1,7 @@ import pytest +from unittest.mock import patch + from dcqc.file import FileType from dcqc.suites.suite_abc import SuiteABC, SuiteStatus from dcqc.suites.suites import FileSuite, OmeTiffSuite, TiffSuite @@ -124,3 +126,16 @@ def test_that_a_suite_will_consider_required_tests_when_passing(test_targets): suite = SuiteABC.from_target(target, required_tests) suite_status = suite.compute_status() assert suite_status == SuiteStatus.GREEN + + +def test_that_status_is_computed_if_not_already_assigned(test_targets): + with patch.object( + SuiteABC, "compute_status", return_value=SuiteStatus.GREEN + ) as patch_compute_status: + target = test_targets["good"] + required_tests = ["Md5ChecksumTest"] + suite = SuiteABC.from_target(target, required_tests) + suite._status = SuiteStatus.NONE + suite_status = suite.get_status() + assert suite_status == SuiteStatus.GREEN + patch_compute_status.assert_called_once() From 664882dc956884ee3bce4aed170e30c42f9e5287 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 16:09:58 -0600 Subject: [PATCH 07/18] updates test_updaters --- tests/test_suites.py | 4 ++-- tests/test_updaters.py | 44 ++++++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/tests/test_suites.py b/tests/test_suites.py index a086daf..9f96a5c 100644 --- a/tests/test_suites.py +++ b/tests/test_suites.py @@ -1,7 +1,7 @@ -import pytest - from unittest.mock import patch +import pytest + from dcqc.file import FileType from dcqc.suites.suite_abc import SuiteABC, SuiteStatus from dcqc.suites.suites import FileSuite, OmeTiffSuite, TiffSuite diff --git a/tests/test_updaters.py b/tests/test_updaters.py index 2ba24a8..e268069 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -5,20 +5,34 @@ from dcqc.updaters import CsvUpdater +def generate_mocked_suites(mock_dict): + mocked_suites = [] + for url, status in mock_dict.items(): + suite = MagicMock(SuiteABC) + suite.target.files[0].url = url + suite.get_status.return_value = status + mocked_suites.append(suite) + return mocked_suites + + class TestCsvUpdater: - mock_dict = { + mock_dict_single = { "syn://syn51585496": "GREEN", "syn://syn51585494": "RED", "syn://syn51585495": "AMBER", } + mock_dict_multi = { + "syn://syn51585496": "GREEN", + "syn://syn51585494": "GREEN", + "syn://syn51585495": "GREEN", + } - def __init__(self): - self.mocked_suites = [] - for url, status in self.mock_dict.items(): - suite = MagicMock(SuiteABC) - suite.target.files[0].url = url - suite.get_status.return_value = status - self.mocked_suites.append(suite) + def __init__(self, get_data): + self.mocked_suites_single = generate_mocked_suites(self.mock_dict_single) + self.mocked_suites_multi = generate_mocked_suites(self.mock_dict_multi) + self.input_file = get_data("input.csv") + self.output_file = get_data("output.csv") + self.updater = CsvUpdater(self.input_file, self.output_file) def get_dcqc_status_list_from_file(self, filename): with open(filename, "r") as file: @@ -26,10 +40,12 @@ def get_dcqc_status_list_from_file(self, filename): status_list = [row["dcqc_status"] for row in reader] return status_list - def test_that_csv_updater_updates_csv_as_expected(self, get_data): - input_file = get_data("input.csv") - output_file = get_data("output.csv") - updater = CsvUpdater(input_file, output_file) - updater.update(self.mocked_suites) - status_list = self.get_dcqc_status_list_from_file(output_file) + def test_that_csv_updater_updates_csv_as_expected_with_single_targets(self): + self.updater.update(self.mocked_suites_single) + status_list = self.get_dcqc_status_list_from_file(self.output_file) + assert status_list == ["GREEN", "RED", "AMBER"] + + def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(self): + self.updater.update(self.mocked_suites_multi) + status_list = self.get_dcqc_status_list_from_file(self.output_file) assert status_list == ["GREEN", "RED", "AMBER"] From 189d2f4e2e2861ac23e7e3fff0c33cc4c8bf0441 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Mon, 12 Jun 2023 16:54:27 -0600 Subject: [PATCH 08/18] updates updater to handle directory --- src/dcqc/updaters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index 3986912..85f579f 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -47,8 +47,9 @@ def update(self, suites: List[SuiteABC]): keys = row_list[0].keys() # Export updated CSV + self.output_path.parent.mkdir(parents=True, exist_ok=True) with open( - str(self.output_path), "w", newline="", encoding="utf-8" + str(self.output_path), "w+", newline="", encoding="utf-8" ) as output_file: dict_writer = DictWriter(output_file, keys) dict_writer.writeheader() From 95e09b172562fee7de7742cf86d8fe269aff08de Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 09:19:14 -0600 Subject: [PATCH 09/18] remove compute_ok --- src/dcqc/suites/suite_abc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py index 76c91ae..38e08a4 100644 --- a/src/dcqc/suites/suite_abc.py +++ b/src/dcqc/suites/suite_abc.py @@ -293,8 +293,8 @@ def get_base_class(cls): """Retrieve base class.""" return SuiteABC - def get_status(self, compute_ok: bool = True) -> SuiteStatus: + def get_status(self) -> SuiteStatus: """Compute (if applicable) and return the suite status.""" - if self._status == SuiteStatus.NONE and compute_ok: + if self._status == SuiteStatus.NONE: self._status = self.compute_status() return self._status From 0d0f788407427ebb1a72e201ddcd8fda8ebd995e Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 10:02:58 -0600 Subject: [PATCH 10/18] implement defaultdict --- src/dcqc/updaters.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index 85f579f..b487916 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -1,11 +1,14 @@ +from collections import defaultdict from csv import DictWriter +from dataclasses import dataclass from pathlib import Path -from typing import Dict, List +from typing import List from dcqc.parsers import CsvParser from dcqc.suites.suite_abc import SuiteABC +@dataclass class CsvUpdater: input_path: Path output_path: Path @@ -13,20 +16,15 @@ class CsvUpdater: def __init__(self, input_path: Path, output_path: Path): self.output_path = output_path - self.parser = CsvParser(input_path) + self.input_path = input_path def update(self, suites: List[SuiteABC]): - suite_dict: Dict[ - str, List[str] - ] = {} # mypy made me do this, but only here. not sure why + suite_dict = defaultdict(list) # {url: [list_of_statuses]} data structure to allow for multi-file targets for suite in suites: url = suite.target.files[0].url status = suite.get_status() - if not suite_dict.get(url): - suite_dict[url] = [status.value] - else: - suite_dict[url].append(status.value) + suite_dict[url].append(status.value) # Evaluate dcqc_status for each url collapsed_dict = {} for url, statuses in suite_dict.items(): @@ -40,7 +38,8 @@ def update(self, suites: List[SuiteABC]): collapsed_dict[url] = "NONE" # Create CSV data structure row_list = [] - for row in self.parser.list_rows(): + parser = CsvParser(self.input_path) + for row in parser.list_rows(): csv_data = row[1] csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]] row_list.append(csv_data) From d35b87e181aea930684a875b17746e73a8c01611 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 10:07:00 -0600 Subject: [PATCH 11/18] Merge branch 'main' into bwmac/orca-175/add_csv_updater --- src/dcqc/suites/suite_abc.py | 3 +- src/dcqc/suites/suites.py | 1 + src/dcqc/target.py | 26 +++++---- src/dcqc/tests/__init__.py | 1 + src/dcqc/tests/paired_fastq_parity_test.py | 59 +++++++++++++++++++++ tests/conftest.py | 5 ++ tests/data/fastq1.fastq | 8 +++ tests/data/fastq2.fastq.gz | Bin 0 -> 58 bytes tests/data/generate.py | 1 + tests/data/suite.json | 2 +- tests/data/target.json | 2 +- tests/data/test.computed.json | 2 +- tests/data/test.external.json | 2 +- tests/data/test.internal.json | 2 +- tests/data/tests.json | 6 +-- tests/test_target.py | 22 +++++++- tests/test_tests.py | 33 +++++++++++- 17 files changed, 154 insertions(+), 21 deletions(-) create mode 100644 src/dcqc/tests/paired_fastq_parity_test.py create mode 100644 tests/data/fastq1.fastq create mode 100644 tests/data/fastq2.fastq.gz diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py index 38e08a4..36a715d 100644 --- a/src/dcqc/suites/suite_abc.py +++ b/src/dcqc/suites/suite_abc.py @@ -228,7 +228,7 @@ def compute_status(self) -> SuiteStatus: if test_name in self.required_tests: if test_status == TestStatus.FAIL: self._status = SuiteStatus.RED - break + return self._status else: if test_status == TestStatus.FAIL: self._status = SuiteStatus.AMBER @@ -236,7 +236,6 @@ def compute_status(self) -> SuiteStatus: def to_dict(self) -> SerializedObject: suite_status = self.compute_status() - print(suite_status) test_dicts = [] for test in self.tests: test_dict = test.to_dict() diff --git a/src/dcqc/suites/suites.py b/src/dcqc/suites/suites.py index 33535e5..9227984 100644 --- a/src/dcqc/suites/suites.py +++ b/src/dcqc/suites/suites.py @@ -44,3 +44,4 @@ class BAMSuite(FileSuite): class FastqSuite(FileSuite): file_type = FileType.get_file_type("FASTQ") + add_tests = (tests.PairedFastqParityTest,) diff --git a/src/dcqc/target.py b/src/dcqc/target.py index 4de370f..64ae1b7 100644 --- a/src/dcqc/target.py +++ b/src/dcqc/target.py @@ -105,16 +105,9 @@ def __post_init__(self, file_or_files: File | list[File]): # switching to @pydantic.dataclasses.dataclass: # test_that_paths_are_unchanged_when_not_using_serialize_paths_relative_to def ensure_single_file(self): - """Ensure that target is only initialized with a single file. - - Args: - value: List of files. - - Returns: - List of files. - """ + """Ensure that target is only initialized with a single file.""" if len(self.files) != 1: - raise ValueError("Target is restricted to single files") + raise ValueError("SingleTarget is restricted to single files") @property def file(self): @@ -128,3 +121,18 @@ def get_file_type(self) -> FileType: The file type object. """ return self.file.get_file_type() + + +@dataclass(init=False) +class PairedTarget(BaseTarget): + """Paired (two-file) target.""" + + def __post_init__(self, file_or_files: File | list[File]): + """Run validation checks after initialization.""" + super().__post_init__(file_or_files) + self.ensure_two_files() + + def ensure_two_files(self): + """Ensure that target is only initialized with two files.""" + if len(self.files) != 2: + raise ValueError("PairedTarget is restricted to two files") diff --git a/src/dcqc/tests/__init__.py b/src/dcqc/tests/__init__.py index f38bcb0..dd5e220 100644 --- a/src/dcqc/tests/__init__.py +++ b/src/dcqc/tests/__init__.py @@ -7,4 +7,5 @@ from dcqc.tests.libtiff_info_test import LibTiffInfoTest from dcqc.tests.md5_checksum_test import Md5ChecksumTest from dcqc.tests.ome_xml_schema_test import OmeXmlSchemaTest +from dcqc.tests.paired_fastq_parity_test import PairedFastqParityTest from dcqc.tests.tiff_tag_306_date_time_test import TiffTag306DateTimeTest diff --git a/src/dcqc/tests/paired_fastq_parity_test.py b/src/dcqc/tests/paired_fastq_parity_test.py new file mode 100644 index 0000000..3e9f85e --- /dev/null +++ b/src/dcqc/tests/paired_fastq_parity_test.py @@ -0,0 +1,59 @@ +import gzip +from pathlib import Path +from typing import TextIO + +from dcqc.target import PairedTarget +from dcqc.tests.base_test import InternalBaseTest, TestStatus + + +class PairedFastqParityTest(InternalBaseTest): + tier = 2 + target: PairedTarget + + def compute_status(self) -> TestStatus: + """Compute test status.""" + counts = list() + for file in self.target.files: + path = file.stage() + try: + count = self._count_fastq_lines(path) + except Exception: + return TestStatus.FAIL + counts.append(count) + + # Check that there counts are all the same (i.e., equal) + if len(set(counts)) <= 1: + status = TestStatus.PASS + else: + status = TestStatus.FAIL + return status + + def _count_fastq_lines(self, path: Path) -> int: + """Count the number of lines in a FASTQ file. + + Args: + path: Path to the FASTQ file. + + Returns: + Number of lines in the given FASTQ file. + """ + # Source: https://stackoverflow.com/a/1019572/21077945 + with self._open_fastq(path) as fastq: + num_lines = sum(1 for _ in fastq) + return num_lines + + def _open_fastq(self, path: Path) -> TextIO: + """Open a FASTQ file regardless of compression. + + Args: + path: Path to the FASTQ file. + + Returns: + Opened FASTQ file (in text mode). + """ + # TODO: This logic should ideally live in the File class, and a + # test should confirm the integrity of compressed files + if path.name.endswith(".gz"): + return gzip.open(path, "rt") + else: + return path.open("rt") diff --git a/tests/conftest.py b/tests/conftest.py index d4336c7..6a19a94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -56,6 +56,8 @@ def test_files(get_data): txt_path = get_data("test.txt") jsonld_path = get_data("example.jsonld") tiff_path = get_data("circuit.tif") + fastq1_path = get_data("fastq1.fastq") + fastq2_path = get_data("fastq2.fastq.gz") syn_path = "syn://syn50555279" tiff_dirty_datetime_path = get_data("test_image_dirty_datetime.tif") good_metadata = { @@ -74,6 +76,7 @@ def test_files(get_data): "file_type": "tiff", "md5_checksum": "c7b08f6decb5e7572efbe6074926a843", } + fastq_metadata = {"file_type": "fastq"} tiff_dirty_datetime_metadata = { "file_type": "tiff", "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268", @@ -82,6 +85,8 @@ def test_files(get_data): "good": File(txt_path.as_posix(), good_metadata), "bad": File(txt_path.as_posix(), bad_metadata), "tiff": File(tiff_path.as_posix(), tiff_metadata), + "fastq1": File(fastq1_path.as_posix(), fastq_metadata), + "fastq2": File(fastq2_path.as_posix(), fastq_metadata), "jsonld": File(jsonld_path.as_posix(), jsonld_metadata), "synapse": File(syn_path, good_metadata), "tiff_dirty_datetime": File( diff --git a/tests/data/fastq1.fastq b/tests/data/fastq1.fastq new file mode 100644 index 0000000..535d2b0 --- /dev/null +++ b/tests/data/fastq1.fastq @@ -0,0 +1,8 @@ +1 +2 +3 +4 +5 +6 +7 +8 diff --git a/tests/data/fastq2.fastq.gz b/tests/data/fastq2.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0b52ac8bfa701248b39cd1037605450a9e47f35 GIT binary patch literal 58 zcmb2|=HLk0(~!czoR(NzQfQ Date: Tue, 13 Jun 2023 10:20:33 -0600 Subject: [PATCH 12/18] updates generate.py to include suites.json --- tests/data/generate.py | 13 ++++++ tests/data/suites.json | 12 +++--- tests/data/suites_files/suites_1.json | 59 +++++++++++++++++++++++++++ tests/data/suites_files/suites_2.json | 59 +++++++++++++++++++++++++++ tests/data/suites_files/suites_3.json | 59 +++++++++++++++++++++++++++ 5 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 tests/data/suites_files/suites_1.json create mode 100644 tests/data/suites_files/suites_2.json create mode 100644 tests/data/suites_files/suites_3.json diff --git a/tests/data/generate.py b/tests/data/generate.py index 1ab489f..4e4ef3c 100755 --- a/tests/data/generate.py +++ b/tests/data/generate.py @@ -11,6 +11,7 @@ from dcqc import tests from dcqc.file import File from dcqc.mixins import SerializableMixin +from dcqc.parsers import JsonParser from dcqc.reports import JsonReport from dcqc.suites.suite_abc import SuiteABC from dcqc.target import SingleTarget @@ -60,3 +61,15 @@ def export(obj: SerializableMixin | Sequence[SerializableMixin], filename: str): skipped_tests = ["LibTiffInfoTest"] suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests) export(suite, "suite.json") + +# suites.json +input_jsons = [ + Path(file_path) + for file_path in [ + "tests/data/suites_files/suites_1.json", + "tests/data/suites_files/suites_2.json", + "tests/data/suites_files/suites_3.json", + ] +] +suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons] +export(suites, "suites.json") diff --git a/tests/data/suites.json b/tests/data/suites.json index 107a5c6..aaf72e4 100644 --- a/tests/data/suites.json +++ b/tests/data/suites.json @@ -19,8 +19,8 @@ "suite_status": { "required_tests": [ "Md5ChecksumTest", - "LibTiffInfoTest", - "FileExtensionTest" + "FileExtensionTest", + "LibTiffInfoTest" ], "skipped_tests": [], "status": "GREEN" @@ -78,8 +78,8 @@ "suite_status": { "required_tests": [ "Md5ChecksumTest", - "LibTiffInfoTest", - "FileExtensionTest" + "FileExtensionTest", + "LibTiffInfoTest" ], "skipped_tests": [], "status": "RED" @@ -137,8 +137,8 @@ "suite_status": { "required_tests": [ "Md5ChecksumTest", - "LibTiffInfoTest", - "FileExtensionTest" + "FileExtensionTest", + "LibTiffInfoTest" ], "skipped_tests": [], "status": "AMBER" diff --git a/tests/data/suites_files/suites_1.json b/tests/data/suites_files/suites_1.json new file mode 100644 index 0000000..96e0607 --- /dev/null +++ b/tests/data/suites_files/suites_1.json @@ -0,0 +1,59 @@ +{ + "type": "TiffSuite", + "target": { + "id": "0001", + "files": [ + { + "url": "syn://syn51585496", + "metadata": { + "md5_checksum": "c7b08f6decb5e7572efbe6074926a843" + }, + "type": "TIFF", + "name": "circuit.tif", + "local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "GREEN" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] +} diff --git a/tests/data/suites_files/suites_2.json b/tests/data/suites_files/suites_2.json new file mode 100644 index 0000000..1cc45bc --- /dev/null +++ b/tests/data/suites_files/suites_2.json @@ -0,0 +1,59 @@ +{ + "type": "TiffSuite", + "target": { + "id": "0002", + "files": [ + { + "url": "syn://syn51585494", + "metadata": { + "md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404" + }, + "type": "TIFF", + "name": "test_contains_word_date.tif", + "local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "RED" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "failed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] +} diff --git a/tests/data/suites_files/suites_3.json b/tests/data/suites_files/suites_3.json new file mode 100644 index 0000000..80eca4a --- /dev/null +++ b/tests/data/suites_files/suites_3.json @@ -0,0 +1,59 @@ +{ + "type": "TiffSuite", + "target": { + "id": "0003", + "files": [ + { + "url": "syn://syn51585495", + "metadata": { + "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268" + }, + "type": "TIFF", + "name": "test_image_dirty_datetime.tif", + "local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "AMBER" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + } + ] +} From dfbaad33f0f7386136e9b459d15dbc32a7a940b9 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 10:30:02 -0600 Subject: [PATCH 13/18] add back multi file testing dict --- tests/test_updaters.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_updaters.py b/tests/test_updaters.py index e268069..b0c3e66 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -24,7 +24,9 @@ class TestCsvUpdater: mock_dict_multi = { "syn://syn51585496": "GREEN", "syn://syn51585494": "GREEN", + "syn://syn51585494": "RED", "syn://syn51585495": "GREEN", + "syn://syn51585495": "AMBER", } def __init__(self, get_data): From 6bf487e1a964af5260207fff5b6b3f907612d8d3 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 11:49:30 -0600 Subject: [PATCH 14/18] updates test_updaters --- src/dcqc/updaters.py | 24 ++++++----- tests/conftest.py | 35 +++++++++++++++- tests/data/empty_input.csv | 1 + tests/test_updaters.py | 83 ++++++++++++++++---------------------- 4 files changed, 83 insertions(+), 60 deletions(-) create mode 100644 tests/data/empty_input.csv diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index b487916..048cc29 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -39,17 +39,19 @@ def update(self, suites: List[SuiteABC]): # Create CSV data structure row_list = [] parser = CsvParser(self.input_path) - for row in parser.list_rows(): - csv_data = row[1] + for _, csv_data in parser.list_rows(): csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]] row_list.append(csv_data) - keys = row_list[0].keys() - # Export updated CSV - self.output_path.parent.mkdir(parents=True, exist_ok=True) - with open( - str(self.output_path), "w+", newline="", encoding="utf-8" - ) as output_file: - dict_writer = DictWriter(output_file, keys) - dict_writer.writeheader() - dict_writer.writerows(row_list) + if row_list: + keys = row_list[0].keys() + # Export updated CSV + self.output_path.parent.mkdir(parents=True, exist_ok=True) + with open( + str(self.output_path), "w+", newline="", encoding="utf-8" + ) as output_file: + dict_writer = DictWriter(output_file, keys) + dict_writer.writeheader() + dict_writer.writerows(row_list) + else: + raise ValueError("No rows found in input CSV") diff --git a/tests/conftest.py b/tests/conftest.py index 6a19a94..d118eb9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,12 +10,13 @@ from datetime import datetime from getpass import getuser from pathlib import Path +from unittest.mock import MagicMock from uuid import uuid4 import pytest from dcqc.file import File -from dcqc.suites.suite_abc import SuiteABC +from dcqc.suites.suite_abc import SuiteABC, SuiteStatus from dcqc.target import SingleTarget CNFPATH = Path(__file__).resolve() @@ -129,3 +130,35 @@ def _get_output(filename: str) -> Path: return output yield _get_output + + +@pytest.fixture +def mocked_suites_single_targets(): + mock_dict_single = { + "syn://syn51585496": SuiteStatus.GREEN, + "syn://syn51585494": SuiteStatus.RED, + "syn://syn51585495": SuiteStatus.AMBER, + } + mocked_suites = [] + for url, status in mock_dict_single.items(): + suite = MagicMock(cls=SuiteABC) + suite.target.files[0].url = url + suite.get_status.return_value = status + mocked_suites.append(suite) + return mocked_suites + + +@pytest.fixture +def mocked_suites_multi_targets(): + mock_dict_multi = { + "syn://syn51585496": SuiteStatus.GREEN, + "syn://syn51585494": SuiteStatus.RED, + "syn://syn51585495": SuiteStatus.AMBER, + } + mocked_suites = [] + for url, status in mock_dict_multi.items(): + suite = MagicMock(cls=SuiteABC) + suite.target.files[0].url = url + suite.get_status.return_value = status + mocked_suites.append(suite) + return mocked_suites diff --git a/tests/data/empty_input.csv b/tests/data/empty_input.csv new file mode 100644 index 0000000..1255d77 --- /dev/null +++ b/tests/data/empty_input.csv @@ -0,0 +1 @@ +url,file_type,md5_checksum diff --git a/tests/test_updaters.py b/tests/test_updaters.py index b0c3e66..6bb2380 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -1,53 +1,40 @@ import csv -from unittest.mock import MagicMock -from dcqc.suites.suite_abc import SuiteABC +import pytest + from dcqc.updaters import CsvUpdater -def generate_mocked_suites(mock_dict): - mocked_suites = [] - for url, status in mock_dict.items(): - suite = MagicMock(SuiteABC) - suite.target.files[0].url = url - suite.get_status.return_value = status - mocked_suites.append(suite) - return mocked_suites - - -class TestCsvUpdater: - mock_dict_single = { - "syn://syn51585496": "GREEN", - "syn://syn51585494": "RED", - "syn://syn51585495": "AMBER", - } - mock_dict_multi = { - "syn://syn51585496": "GREEN", - "syn://syn51585494": "GREEN", - "syn://syn51585494": "RED", - "syn://syn51585495": "GREEN", - "syn://syn51585495": "AMBER", - } - - def __init__(self, get_data): - self.mocked_suites_single = generate_mocked_suites(self.mock_dict_single) - self.mocked_suites_multi = generate_mocked_suites(self.mock_dict_multi) - self.input_file = get_data("input.csv") - self.output_file = get_data("output.csv") - self.updater = CsvUpdater(self.input_file, self.output_file) - - def get_dcqc_status_list_from_file(self, filename): - with open(filename, "r") as file: - reader = csv.DictReader(file) - status_list = [row["dcqc_status"] for row in reader] - return status_list - - def test_that_csv_updater_updates_csv_as_expected_with_single_targets(self): - self.updater.update(self.mocked_suites_single) - status_list = self.get_dcqc_status_list_from_file(self.output_file) - assert status_list == ["GREEN", "RED", "AMBER"] - - def test_that_csv_updater_updates_csv_as_expected_with_multi_targets(self): - self.updater.update(self.mocked_suites_multi) - status_list = self.get_dcqc_status_list_from_file(self.output_file) - assert status_list == ["GREEN", "RED", "AMBER"] +def get_dcqc_status_list_from_file(filename): + with open(filename, "r") as file: + reader = csv.DictReader(file) + status_list = [row["dcqc_status"] for row in reader] + return status_list + + +def test_that_csv_updater_updates_csv_as_expected_with_single_targets( + get_data, mocked_suites_single_targets +): + input_file = get_data("input.csv") + output_file = get_data("output.csv") + updater = CsvUpdater(input_file, output_file) + updater.update(mocked_suites_single_targets) + status_list = get_dcqc_status_list_from_file(output_file) + assert status_list == ["GREEN", "RED", "AMBER"] + + +def test_that_csv_updater_updates_csv_as_expected_with_multi_targets( + get_data, mocked_suites_multi_targets +): + input_file = get_data("input.csv") + output_file = get_data("output.csv") + updater = CsvUpdater(input_file, output_file) + updater.update(mocked_suites_multi_targets) + status_list = get_dcqc_status_list_from_file(output_file) + assert status_list == ["GREEN", "RED", "AMBER"] + + +def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets): + with pytest.raises(ValueError): + empty_updater = CsvUpdater(get_data("empty_input.csv"), get_data("output.csv")) + empty_updater.update(mocked_suites_single_targets) From e6d853cbf274feb3e17d2370f9023bb45dfe4bc5 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 13:59:45 -0600 Subject: [PATCH 15/18] adds TODO for multi-file --- src/dcqc/updaters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index 048cc29..a9ad002 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -21,6 +21,7 @@ def __init__(self, input_path: Path, output_path: Path): def update(self, suites: List[SuiteABC]): suite_dict = defaultdict(list) # {url: [list_of_statuses]} data structure to allow for multi-file targets + # TODO add support for suites with multiple files in them (multi-file targets) for suite in suites: url = suite.target.files[0].url status = suite.get_status() From c7262f647cf584123d5de5b4611d7bbd9127eeac Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 14:15:58 -0600 Subject: [PATCH 16/18] comment out multi_target testing --- tests/conftest.py | 28 ++++++++++++++-------------- tests/test_updaters.py | 18 +++++++++--------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d118eb9..a9bd72e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -148,17 +148,17 @@ def mocked_suites_single_targets(): return mocked_suites -@pytest.fixture -def mocked_suites_multi_targets(): - mock_dict_multi = { - "syn://syn51585496": SuiteStatus.GREEN, - "syn://syn51585494": SuiteStatus.RED, - "syn://syn51585495": SuiteStatus.AMBER, - } - mocked_suites = [] - for url, status in mock_dict_multi.items(): - suite = MagicMock(cls=SuiteABC) - suite.target.files[0].url = url - suite.get_status.return_value = status - mocked_suites.append(suite) - return mocked_suites +# @pytest.fixture +# def mocked_suites_multi_targets(): +# mock_dict_multi = { +# "syn://syn51585496": SuiteStatus.GREEN, +# "syn://syn51585494": SuiteStatus.RED, +# "syn://syn51585495": SuiteStatus.AMBER, +# } +# mocked_suites = [] +# for url, status in mock_dict_multi.items(): +# suite = MagicMock(cls=SuiteABC) +# suite.target.files[0].url = url +# suite.get_status.return_value = status +# mocked_suites.append(suite) +# return mocked_suites diff --git a/tests/test_updaters.py b/tests/test_updaters.py index 6bb2380..9122a50 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -23,15 +23,15 @@ def test_that_csv_updater_updates_csv_as_expected_with_single_targets( assert status_list == ["GREEN", "RED", "AMBER"] -def test_that_csv_updater_updates_csv_as_expected_with_multi_targets( - get_data, mocked_suites_multi_targets -): - input_file = get_data("input.csv") - output_file = get_data("output.csv") - updater = CsvUpdater(input_file, output_file) - updater.update(mocked_suites_multi_targets) - status_list = get_dcqc_status_list_from_file(output_file) - assert status_list == ["GREEN", "RED", "AMBER"] +# def test_that_csv_updater_updates_csv_as_expected_with_multi_targets( +# get_data, mocked_suites_multi_targets +# ): +# input_file = get_data("input.csv") +# output_file = get_data("output.csv") +# updater = CsvUpdater(input_file, output_file) +# updater.update(mocked_suites_multi_targets) +# status_list = get_dcqc_status_list_from_file(output_file) +# assert status_list == ["GREEN", "RED", "AMBER"] def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets): From e9dffd0958fcc566af40c6328f858412b22a369d Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 16:35:47 -0600 Subject: [PATCH 17/18] Removes unneeded None handling in `update` --- src/dcqc/updaters.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index a9ad002..0ecb9dc 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -33,10 +33,8 @@ def update(self, suites: List[SuiteABC]): collapsed_dict[url] = "RED" elif "AMBER" in statuses: collapsed_dict[url] = "AMBER" - elif "GREEN" in statuses: - collapsed_dict[url] = "GREEN" else: - collapsed_dict[url] = "NONE" + collapsed_dict[url] = "GREEN" # Create CSV data structure row_list = [] parser = CsvParser(self.input_path) From c21bc9b242ff6fd02f37d988bd644eb1e1536f02 Mon Sep 17 00:00:00 2001 From: Brad Macdonald Date: Tue, 13 Jun 2023 17:03:29 -0600 Subject: [PATCH 18/18] adds NONE check to updater test --- src/dcqc/updaters.py | 6 ++++-- tests/conftest.py | 1 + tests/data/test_input.csv | 5 +++++ tests/data/{output.csv => test_output.csv} | 1 + tests/test_updaters.py | 20 +++++++++++--------- 5 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 tests/data/test_input.csv rename tests/data/{output.csv => test_output.csv} (77%) diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py index 0ecb9dc..6002643 100644 --- a/src/dcqc/updaters.py +++ b/src/dcqc/updaters.py @@ -21,7 +21,7 @@ def __init__(self, input_path: Path, output_path: Path): def update(self, suites: List[SuiteABC]): suite_dict = defaultdict(list) # {url: [list_of_statuses]} data structure to allow for multi-file targets - # TODO add support for suites with multiple files in them (multi-file targets) + # TODO add support for suites with multiple files in them (multi) for suite in suites: url = suite.target.files[0].url status = suite.get_status() @@ -33,8 +33,10 @@ def update(self, suites: List[SuiteABC]): collapsed_dict[url] = "RED" elif "AMBER" in statuses: collapsed_dict[url] = "AMBER" - else: + elif "GREEN" in statuses: collapsed_dict[url] = "GREEN" + else: + collapsed_dict[url] = "NONE" # Create CSV data structure row_list = [] parser = CsvParser(self.input_path) diff --git a/tests/conftest.py b/tests/conftest.py index a9bd72e..69a6736 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -138,6 +138,7 @@ def mocked_suites_single_targets(): "syn://syn51585496": SuiteStatus.GREEN, "syn://syn51585494": SuiteStatus.RED, "syn://syn51585495": SuiteStatus.AMBER, + "syn://syn51585493": SuiteStatus.NONE, } mocked_suites = [] for url, status in mock_dict_single.items(): diff --git a/tests/data/test_input.csv b/tests/data/test_input.csv new file mode 100644 index 0000000..6501a6a --- /dev/null +++ b/tests/data/test_input.csv @@ -0,0 +1,5 @@ +url,file_type,md5_checksum +syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9 +syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff +syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9 +syn://syn51585493,TIFF,38b86a456d1f441008986c6f798d5ef9 diff --git a/tests/data/output.csv b/tests/data/test_output.csv similarity index 77% rename from tests/data/output.csv rename to tests/data/test_output.csv index ab1167f..e37ee65 100644 --- a/tests/data/output.csv +++ b/tests/data/test_output.csv @@ -2,3 +2,4 @@ url,file_type,md5_checksum,dcqc_status syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9,GREEN syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff,RED syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9,AMBER +syn://syn51585493,TIFF,38b86a456d1f441008986c6f798d5ef9,NONE diff --git a/tests/test_updaters.py b/tests/test_updaters.py index 9122a50..65198fe 100644 --- a/tests/test_updaters.py +++ b/tests/test_updaters.py @@ -15,12 +15,20 @@ def get_dcqc_status_list_from_file(filename): def test_that_csv_updater_updates_csv_as_expected_with_single_targets( get_data, mocked_suites_single_targets ): - input_file = get_data("input.csv") - output_file = get_data("output.csv") + input_file = get_data("test_input.csv") + output_file = get_data("test_output.csv") updater = CsvUpdater(input_file, output_file) updater.update(mocked_suites_single_targets) status_list = get_dcqc_status_list_from_file(output_file) - assert status_list == ["GREEN", "RED", "AMBER"] + assert status_list == ["GREEN", "RED", "AMBER", "NONE"] + + +def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets): + with pytest.raises(ValueError): + empty_updater = CsvUpdater( + get_data("empty_input.csv"), get_data("test_output.csv") + ) + empty_updater.update(mocked_suites_single_targets) # def test_that_csv_updater_updates_csv_as_expected_with_multi_targets( @@ -32,9 +40,3 @@ def test_that_csv_updater_updates_csv_as_expected_with_single_targets( # updater.update(mocked_suites_multi_targets) # status_list = get_dcqc_status_list_from_file(output_file) # assert status_list == ["GREEN", "RED", "AMBER"] - - -def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets): - with pytest.raises(ValueError): - empty_updater = CsvUpdater(get_data("empty_input.csv"), get_data("output.csv")) - empty_updater.update(mocked_suites_single_targets)