diff --git a/src/dcqc/main.py b/src/dcqc/main.py index a9da6b3..72583a8 100644 --- a/src/dcqc/main.py +++ b/src/dcqc/main.py @@ -13,6 +13,7 @@ from dcqc.suites.suite_abc import SuiteABC from dcqc.target import SingleTarget from dcqc.tests.base_test import BaseTest, ExternalTestMixin +from dcqc.updaters import CsvUpdater # Make commands optional to allow for `dcqc --version` app = Typer(invoke_without_command=True) @@ -204,3 +205,15 @@ def qc_file( report = JsonReport() suite_json = report.generate(suite) json.dump(suite_json, sys.stdout, indent=2) + + +@app.command() +def update_csv( + suites_file: Path = input_path_arg, + input_file: Path = input_path_arg, + output_file: Path = output_path_arg, +): + """Update input CSV file with dcqc_status column""" + suites = JsonParser.parse_objects(suites_file, SuiteABC) + updater = CsvUpdater(input_file, output_file) + updater.update(suites) diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py index adf3aef..36a715d 100644 --- a/src/dcqc/suites/suite_abc.py +++ b/src/dcqc/suites/suite_abc.py @@ -291,3 +291,9 @@ def from_dict(cls, dictionary: SerializedObject) -> SuiteABC: def get_base_class(cls): """Retrieve base class.""" return SuiteABC + + def get_status(self) -> SuiteStatus: + """Compute (if applicable) and return the suite status.""" + if self._status == SuiteStatus.NONE: + self._status = self.compute_status() + return self._status diff --git a/src/dcqc/updaters.py b/src/dcqc/updaters.py new file mode 100644 index 0000000..6002643 --- /dev/null +++ b/src/dcqc/updaters.py @@ -0,0 +1,58 @@ +from collections import defaultdict +from csv import DictWriter +from dataclasses import dataclass +from pathlib import Path +from typing import List + +from dcqc.parsers import CsvParser +from dcqc.suites.suite_abc import SuiteABC + + +@dataclass +class CsvUpdater: + input_path: Path + output_path: Path + parser: CsvParser + + def __init__(self, input_path: Path, output_path: Path): + self.output_path = output_path + self.input_path = input_path + + def update(self, suites: List[SuiteABC]): + suite_dict = defaultdict(list) + # {url: [list_of_statuses]} data structure to allow for multi-file targets + # TODO add support for suites with multiple files in them (multi) + for suite in suites: + url = suite.target.files[0].url + status = suite.get_status() + suite_dict[url].append(status.value) + # Evaluate dcqc_status for each url + collapsed_dict = {} + for url, statuses in suite_dict.items(): + if "RED" in statuses: + collapsed_dict[url] = "RED" + elif "AMBER" in statuses: + collapsed_dict[url] = "AMBER" + elif "GREEN" in statuses: + collapsed_dict[url] = "GREEN" + else: + collapsed_dict[url] = "NONE" + # Create CSV data structure + row_list = [] + parser = CsvParser(self.input_path) + for _, csv_data in parser.list_rows(): + csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]] + row_list.append(csv_data) + + if row_list: + keys = row_list[0].keys() + # Export updated CSV + self.output_path.parent.mkdir(parents=True, exist_ok=True) + with open( + str(self.output_path), "w+", newline="", encoding="utf-8" + ) as output_file: + dict_writer = DictWriter(output_file, keys) + dict_writer.writeheader() + dict_writer.writerows(row_list) + else: + raise ValueError("No rows found in input CSV") diff --git a/tests/conftest.py b/tests/conftest.py index 6a19a94..69a6736 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,12 +10,13 @@ from datetime import datetime from getpass import getuser from pathlib import Path +from unittest.mock import MagicMock from uuid import uuid4 import pytest from dcqc.file import File -from dcqc.suites.suite_abc import SuiteABC +from dcqc.suites.suite_abc import SuiteABC, SuiteStatus from dcqc.target import SingleTarget CNFPATH = Path(__file__).resolve() @@ -129,3 +130,36 @@ def _get_output(filename: str) -> Path: return output yield _get_output + + +@pytest.fixture +def mocked_suites_single_targets(): + mock_dict_single = { + "syn://syn51585496": SuiteStatus.GREEN, + "syn://syn51585494": SuiteStatus.RED, + "syn://syn51585495": SuiteStatus.AMBER, + "syn://syn51585493": SuiteStatus.NONE, + } + mocked_suites = [] + for url, status in mock_dict_single.items(): + suite = MagicMock(cls=SuiteABC) + suite.target.files[0].url = url + suite.get_status.return_value = status + mocked_suites.append(suite) + return mocked_suites + + +# @pytest.fixture +# def mocked_suites_multi_targets(): +# mock_dict_multi = { +# "syn://syn51585496": SuiteStatus.GREEN, +# "syn://syn51585494": SuiteStatus.RED, +# "syn://syn51585495": SuiteStatus.AMBER, +# } +# mocked_suites = [] +# for url, status in mock_dict_multi.items(): +# suite = MagicMock(cls=SuiteABC) +# suite.target.files[0].url = url +# suite.get_status.return_value = status +# mocked_suites.append(suite) +# return mocked_suites diff --git a/tests/data/empty_input.csv b/tests/data/empty_input.csv new file mode 100644 index 0000000..1255d77 --- /dev/null +++ b/tests/data/empty_input.csv @@ -0,0 +1 @@ +url,file_type,md5_checksum diff --git a/tests/data/generate.py b/tests/data/generate.py index 1ab489f..4e4ef3c 100755 --- a/tests/data/generate.py +++ b/tests/data/generate.py @@ -11,6 +11,7 @@ from dcqc import tests from dcqc.file import File from dcqc.mixins import SerializableMixin +from dcqc.parsers import JsonParser from dcqc.reports import JsonReport from dcqc.suites.suite_abc import SuiteABC from dcqc.target import SingleTarget @@ -60,3 +61,15 @@ def export(obj: SerializableMixin | Sequence[SerializableMixin], filename: str): skipped_tests = ["LibTiffInfoTest"] suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests) export(suite, "suite.json") + +# suites.json +input_jsons = [ + Path(file_path) + for file_path in [ + "tests/data/suites_files/suites_1.json", + "tests/data/suites_files/suites_2.json", + "tests/data/suites_files/suites_3.json", + ] +] +suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons] +export(suites, "suites.json") diff --git a/tests/data/input.csv b/tests/data/input.csv new file mode 100644 index 0000000..946fcf8 --- /dev/null +++ b/tests/data/input.csv @@ -0,0 +1,4 @@ +url,file_type,md5_checksum +syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9 +syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff +syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9 diff --git a/tests/data/suites.json b/tests/data/suites.json new file mode 100644 index 0000000..aaf72e4 --- /dev/null +++ b/tests/data/suites.json @@ -0,0 +1,179 @@ +[ + { + "type": "TiffSuite", + "target": { + "id": "0001", + "files": [ + { + "url": "syn://syn51585496", + "metadata": { + "md5_checksum": "c7b08f6decb5e7572efbe6074926a843" + }, + "type": "TIFF", + "name": "circuit.tif", + "local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "FileExtensionTest", + "LibTiffInfoTest" + ], + "skipped_tests": [], + "status": "GREEN" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] + }, + { + "type": "TiffSuite", + "target": { + "id": "0002", + "files": [ + { + "url": "syn://syn51585494", + "metadata": { + "md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404" + }, + "type": "TIFF", + "name": "test_contains_word_date.tif", + "local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "FileExtensionTest", + "LibTiffInfoTest" + ], + "skipped_tests": [], + "status": "RED" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "failed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] + }, + { + "type": "TiffSuite", + "target": { + "id": "0003", + "files": [ + { + "url": "syn://syn51585495", + "metadata": { + "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268" + }, + "type": "TIFF", + "name": "test_image_dirty_datetime.tif", + "local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "FileExtensionTest", + "LibTiffInfoTest" + ], + "skipped_tests": [], + "status": "AMBER" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + } + ] + } +] diff --git a/tests/data/suites_files/suites_1.json b/tests/data/suites_files/suites_1.json new file mode 100644 index 0000000..96e0607 --- /dev/null +++ b/tests/data/suites_files/suites_1.json @@ -0,0 +1,59 @@ +{ + "type": "TiffSuite", + "target": { + "id": "0001", + "files": [ + { + "url": "syn://syn51585496", + "metadata": { + "md5_checksum": "c7b08f6decb5e7572efbe6074926a843" + }, + "type": "TIFF", + "name": "circuit.tif", + "local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "GREEN" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] +} diff --git a/tests/data/suites_files/suites_2.json b/tests/data/suites_files/suites_2.json new file mode 100644 index 0000000..1cc45bc --- /dev/null +++ b/tests/data/suites_files/suites_2.json @@ -0,0 +1,59 @@ +{ + "type": "TiffSuite", + "target": { + "id": "0002", + "files": [ + { + "url": "syn://syn51585494", + "metadata": { + "md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404" + }, + "type": "TIFF", + "name": "test_contains_word_date.tif", + "local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "RED" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "failed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + } + ] +} diff --git a/tests/data/suites_files/suites_3.json b/tests/data/suites_files/suites_3.json new file mode 100644 index 0000000..80eca4a --- /dev/null +++ b/tests/data/suites_files/suites_3.json @@ -0,0 +1,59 @@ +{ + "type": "TiffSuite", + "target": { + "id": "0003", + "files": [ + { + "url": "syn://syn51585495", + "metadata": { + "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268" + }, + "type": "TIFF", + "name": "test_image_dirty_datetime.tif", + "local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif" + } + ], + "type": "SingleTarget" + }, + "suite_status": { + "required_tests": [ + "Md5ChecksumTest", + "LibTiffInfoTest", + "FileExtensionTest" + ], + "skipped_tests": [], + "status": "AMBER" + }, + "tests": [ + { + "type": "FileExtensionTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "GrepDateTest", + "tier": 4, + "is_external_test": true, + "status": "passed" + }, + { + "type": "LibTiffInfoTest", + "tier": 2, + "is_external_test": true, + "status": "passed" + }, + { + "type": "Md5ChecksumTest", + "tier": 1, + "is_external_test": false, + "status": "passed" + }, + { + "type": "TiffTag306DateTimeTest", + "tier": 4, + "is_external_test": true, + "status": "failed" + } + ] +} diff --git a/tests/data/test_input.csv b/tests/data/test_input.csv new file mode 100644 index 0000000..6501a6a --- /dev/null +++ b/tests/data/test_input.csv @@ -0,0 +1,5 @@ +url,file_type,md5_checksum +syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9 +syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff +syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9 +syn://syn51585493,TIFF,38b86a456d1f441008986c6f798d5ef9 diff --git a/tests/data/test_output.csv b/tests/data/test_output.csv new file mode 100644 index 0000000..e37ee65 --- /dev/null +++ b/tests/data/test_output.csv @@ -0,0 +1,5 @@ +url,file_type,md5_checksum,dcqc_status +syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9,GREEN +syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff,RED +syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9,AMBER +syn://syn51585493,TIFF,38b86a456d1f441008986c6f798d5ef9,NONE diff --git a/tests/test_main.py b/tests/test_main.py index 04218b4..62e90fe 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -134,3 +134,20 @@ def test_qc_file(get_data): ] result = run_command(args) check_command_result(result) + + +def test_update_csv(get_data, get_output): + suites_path = get_data("suites.json") + input_path = get_data("input.csv") + output_path = get_output("update_csv") / "output.csv" + output_path.unlink(missing_ok=True) + + args = [ + "update-csv", + suites_path, + input_path, + output_path, + ] + result = run_command(args) + check_command_result(result) + assert output_path.exists() diff --git a/tests/test_suites.py b/tests/test_suites.py index 46b4a59..9f96a5c 100644 --- a/tests/test_suites.py +++ b/tests/test_suites.py @@ -1,3 +1,5 @@ +from unittest.mock import patch + import pytest from dcqc.file import FileType @@ -124,3 +126,16 @@ def test_that_a_suite_will_consider_required_tests_when_passing(test_targets): suite = SuiteABC.from_target(target, required_tests) suite_status = suite.compute_status() assert suite_status == SuiteStatus.GREEN + + +def test_that_status_is_computed_if_not_already_assigned(test_targets): + with patch.object( + SuiteABC, "compute_status", return_value=SuiteStatus.GREEN + ) as patch_compute_status: + target = test_targets["good"] + required_tests = ["Md5ChecksumTest"] + suite = SuiteABC.from_target(target, required_tests) + suite._status = SuiteStatus.NONE + suite_status = suite.get_status() + assert suite_status == SuiteStatus.GREEN + patch_compute_status.assert_called_once() diff --git a/tests/test_updaters.py b/tests/test_updaters.py new file mode 100644 index 0000000..65198fe --- /dev/null +++ b/tests/test_updaters.py @@ -0,0 +1,42 @@ +import csv + +import pytest + +from dcqc.updaters import CsvUpdater + + +def get_dcqc_status_list_from_file(filename): + with open(filename, "r") as file: + reader = csv.DictReader(file) + status_list = [row["dcqc_status"] for row in reader] + return status_list + + +def test_that_csv_updater_updates_csv_as_expected_with_single_targets( + get_data, mocked_suites_single_targets +): + input_file = get_data("test_input.csv") + output_file = get_data("test_output.csv") + updater = CsvUpdater(input_file, output_file) + updater.update(mocked_suites_single_targets) + status_list = get_dcqc_status_list_from_file(output_file) + assert status_list == ["GREEN", "RED", "AMBER", "NONE"] + + +def test_that_empty_input_manifest_raises_error(get_data, mocked_suites_single_targets): + with pytest.raises(ValueError): + empty_updater = CsvUpdater( + get_data("empty_input.csv"), get_data("test_output.csv") + ) + empty_updater.update(mocked_suites_single_targets) + + +# def test_that_csv_updater_updates_csv_as_expected_with_multi_targets( +# get_data, mocked_suites_multi_targets +# ): +# input_file = get_data("input.csv") +# output_file = get_data("output.csv") +# updater = CsvUpdater(input_file, output_file) +# updater.update(mocked_suites_multi_targets) +# status_list = get_dcqc_status_list_from_file(output_file) +# assert status_list == ["GREEN", "RED", "AMBER"]