Skip to content

Commit

Permalink
Merge pull request #39 from Sage-Bionetworks-Workflows/bwmac/orca-175…
Browse files Browse the repository at this point in the history
…/add_csv_updater

[ORCA-175] Add `CsvUpdater` and `update_csv` command
  • Loading branch information
BWMac authored Jun 14, 2023
2 parents a46b491 + c21bc9b commit b9d016f
Show file tree
Hide file tree
Showing 16 changed files with 570 additions and 1 deletion.
13 changes: 13 additions & 0 deletions src/dcqc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from dcqc.suites.suite_abc import SuiteABC
from dcqc.target import SingleTarget
from dcqc.tests.base_test import BaseTest, ExternalTestMixin
from dcqc.updaters import CsvUpdater

# Make commands optional to allow for `dcqc --version`
app = Typer(invoke_without_command=True)
Expand Down Expand Up @@ -204,3 +205,15 @@ def qc_file(
report = JsonReport()
suite_json = report.generate(suite)
json.dump(suite_json, sys.stdout, indent=2)


@app.command()
def update_csv(
suites_file: Path = input_path_arg,
input_file: Path = input_path_arg,
output_file: Path = output_path_arg,
):
"""Update input CSV file with dcqc_status column"""
suites = JsonParser.parse_objects(suites_file, SuiteABC)
updater = CsvUpdater(input_file, output_file)
updater.update(suites)
6 changes: 6 additions & 0 deletions src/dcqc/suites/suite_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,9 @@ def from_dict(cls, dictionary: SerializedObject) -> SuiteABC:
def get_base_class(cls):
"""Retrieve base class."""
return SuiteABC

def get_status(self) -> SuiteStatus:
"""Compute (if applicable) and return the suite status."""
if self._status == SuiteStatus.NONE:
self._status = self.compute_status()
return self._status
58 changes: 58 additions & 0 deletions src/dcqc/updaters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from collections import defaultdict
from csv import DictWriter
from dataclasses import dataclass
from pathlib import Path
from typing import List

from dcqc.parsers import CsvParser
from dcqc.suites.suite_abc import SuiteABC


@dataclass
class CsvUpdater:
input_path: Path
output_path: Path
parser: CsvParser

def __init__(self, input_path: Path, output_path: Path):
self.output_path = output_path
self.input_path = input_path

def update(self, suites: List[SuiteABC]):
suite_dict = defaultdict(list)
# {url: [list_of_statuses]} data structure to allow for multi-file targets
# TODO add support for suites with multiple files in them (multi)
for suite in suites:
url = suite.target.files[0].url
status = suite.get_status()
suite_dict[url].append(status.value)
# Evaluate dcqc_status for each url
collapsed_dict = {}
for url, statuses in suite_dict.items():
if "RED" in statuses:
collapsed_dict[url] = "RED"
elif "AMBER" in statuses:
collapsed_dict[url] = "AMBER"
elif "GREEN" in statuses:
collapsed_dict[url] = "GREEN"
else:
collapsed_dict[url] = "NONE"
# Create CSV data structure
row_list = []
parser = CsvParser(self.input_path)
for _, csv_data in parser.list_rows():
csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]]
row_list.append(csv_data)

if row_list:
keys = row_list[0].keys()
# Export updated CSV
self.output_path.parent.mkdir(parents=True, exist_ok=True)
with open(
str(self.output_path), "w+", newline="", encoding="utf-8"
) as output_file:
dict_writer = DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(row_list)
else:
raise ValueError("No rows found in input CSV")
36 changes: 35 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
from datetime import datetime
from getpass import getuser
from pathlib import Path
from unittest.mock import MagicMock
from uuid import uuid4

import pytest

from dcqc.file import File
from dcqc.suites.suite_abc import SuiteABC
from dcqc.suites.suite_abc import SuiteABC, SuiteStatus
from dcqc.target import SingleTarget

CNFPATH = Path(__file__).resolve()
Expand Down Expand Up @@ -129,3 +130,36 @@ def _get_output(filename: str) -> Path:
return output

yield _get_output


@pytest.fixture
def mocked_suites_single_targets():
mock_dict_single = {
"syn://syn51585496": SuiteStatus.GREEN,
"syn://syn51585494": SuiteStatus.RED,
"syn://syn51585495": SuiteStatus.AMBER,
"syn://syn51585493": SuiteStatus.NONE,
}
mocked_suites = []
for url, status in mock_dict_single.items():
suite = MagicMock(cls=SuiteABC)
suite.target.files[0].url = url
suite.get_status.return_value = status
mocked_suites.append(suite)
return mocked_suites


# @pytest.fixture
# def mocked_suites_multi_targets():
# mock_dict_multi = {
# "syn://syn51585496": SuiteStatus.GREEN,
# "syn://syn51585494": SuiteStatus.RED,
# "syn://syn51585495": SuiteStatus.AMBER,
# }
# mocked_suites = []
# for url, status in mock_dict_multi.items():
# suite = MagicMock(cls=SuiteABC)
# suite.target.files[0].url = url
# suite.get_status.return_value = status
# mocked_suites.append(suite)
# return mocked_suites
1 change: 1 addition & 0 deletions tests/data/empty_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
url,file_type,md5_checksum
13 changes: 13 additions & 0 deletions tests/data/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from dcqc import tests
from dcqc.file import File
from dcqc.mixins import SerializableMixin
from dcqc.parsers import JsonParser
from dcqc.reports import JsonReport
from dcqc.suites.suite_abc import SuiteABC
from dcqc.target import SingleTarget
Expand Down Expand Up @@ -60,3 +61,15 @@ def export(obj: SerializableMixin | Sequence[SerializableMixin], filename: str):
skipped_tests = ["LibTiffInfoTest"]
suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests)
export(suite, "suite.json")

# suites.json
input_jsons = [
Path(file_path)
for file_path in [
"tests/data/suites_files/suites_1.json",
"tests/data/suites_files/suites_2.json",
"tests/data/suites_files/suites_3.json",
]
]
suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons]
export(suites, "suites.json")
4 changes: 4 additions & 0 deletions tests/data/input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
url,file_type,md5_checksum
syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9
syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff
syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9
179 changes: 179 additions & 0 deletions tests/data/suites.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
[
{
"type": "TiffSuite",
"target": {
"id": "0001",
"files": [
{
"url": "syn://syn51585496",
"metadata": {
"md5_checksum": "c7b08f6decb5e7572efbe6074926a843"
},
"type": "TIFF",
"name": "circuit.tif",
"local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif"
}
],
"type": "SingleTarget"
},
"suite_status": {
"required_tests": [
"Md5ChecksumTest",
"FileExtensionTest",
"LibTiffInfoTest"
],
"skipped_tests": [],
"status": "GREEN"
},
"tests": [
{
"type": "FileExtensionTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "GrepDateTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
},
{
"type": "LibTiffInfoTest",
"tier": 2,
"is_external_test": true,
"status": "passed"
},
{
"type": "Md5ChecksumTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "TiffTag306DateTimeTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
}
]
},
{
"type": "TiffSuite",
"target": {
"id": "0002",
"files": [
{
"url": "syn://syn51585494",
"metadata": {
"md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404"
},
"type": "TIFF",
"name": "test_contains_word_date.tif",
"local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif"
}
],
"type": "SingleTarget"
},
"suite_status": {
"required_tests": [
"Md5ChecksumTest",
"FileExtensionTest",
"LibTiffInfoTest"
],
"skipped_tests": [],
"status": "RED"
},
"tests": [
{
"type": "FileExtensionTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "GrepDateTest",
"tier": 4,
"is_external_test": true,
"status": "failed"
},
{
"type": "LibTiffInfoTest",
"tier": 2,
"is_external_test": true,
"status": "failed"
},
{
"type": "Md5ChecksumTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "TiffTag306DateTimeTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
}
]
},
{
"type": "TiffSuite",
"target": {
"id": "0003",
"files": [
{
"url": "syn://syn51585495",
"metadata": {
"md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268"
},
"type": "TIFF",
"name": "test_image_dirty_datetime.tif",
"local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif"
}
],
"type": "SingleTarget"
},
"suite_status": {
"required_tests": [
"Md5ChecksumTest",
"FileExtensionTest",
"LibTiffInfoTest"
],
"skipped_tests": [],
"status": "AMBER"
},
"tests": [
{
"type": "FileExtensionTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "GrepDateTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
},
{
"type": "LibTiffInfoTest",
"tier": 2,
"is_external_test": true,
"status": "passed"
},
{
"type": "Md5ChecksumTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "TiffTag306DateTimeTest",
"tier": 4,
"is_external_test": true,
"status": "failed"
}
]
}
]
Loading

0 comments on commit b9d016f

Please sign in to comment.