diff --git a/CHANGELOG.md b/CHANGELOG.md index ba6e71d..47dedfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Added + - CLI command for generating QC report for CDM + ### Fixed ### Changed diff --git a/prp/cli.py b/prp/cli.py index 381456d..8d1100a 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -4,11 +4,11 @@ from typing import List import click -from pydantic import ValidationError +from pydantic import TypeAdapter, ValidationError from .models.metadata import SoupType, SoupVersion from .models.phenotype import ElementType -from .models.qc import QcMethodIndex +from .models.qc import QcMethodIndex, QcSoftware from .models.sample import MethodIndex, PipelineResult from .models.typing import TypingMethod from .parse import ( @@ -85,8 +85,8 @@ def cli(): @click.option("-k", "--mykrobe", type=click.File(), help="mykrobe results") @click.option("-t", "--tbprofiler", type=click.File(), help="tbprofiler results") @click.option("--correct_alleles", is_flag=True, help="Correct alleles") -@click.argument("output", type=click.File("w")) -def create_output( +@click.option("-o", "--output", required=True, type=click.File("w"), help="output filepath") +def create_bonsai_input( sample_id, run_metadata, quast, @@ -247,7 +247,7 @@ def print_schema(): @cli.command() -@click.argument("output", type=click.File("r")) +@click.option("-o", "--output", required=True, type=click.File("r")) def validate(output): """Validate output format of result json file.""" js = json.load(output) @@ -258,3 +258,37 @@ def validate(output): click.secho(err) else: click.secho(f'The file "{output.name}" is valid', fg="green") + + +@cli.command() +@click.option("-q", "--quast", type=click.File(), help="Quast quality control metrics") +@click.option("-p", "--quality", type=click.File(), help="postalignqc qc results") +@click.option("-c", "--cgmlst", type=click.File(), help="cgMLST prediction results") +@click.option("--correct_alleles", is_flag=True, help="Correct alleles") +@click.option("-o", "--output", required=True, type=click.File("w"), help="output filepath") +def create_cdm_input(quast, quality, cgmlst, correct_alleles, output) -> None: + """Format QC metrics into CDM compatible input file.""" + results = [] + if quality: + LOG.info("Parse quality results") + res: QcMethodIndex = parse_postalignqc_results(quality) + results.append(res) + + if quast: + LOG.info("Parse quast results") + res: QcMethodIndex = parse_quast_results(quast) + results.append(res) + + if cgmlst: + LOG.info("Parse cgmlst results") + res: MethodIndex = parse_cgmlst_results(cgmlst, correct_alleles=correct_alleles) + n_missing_loci = QcMethodIndex( + software=QcSoftware.CHEWBBACA, result={"n_missing": res.result.n_missing} + ) + results.append(n_missing_loci) + # cast output as pydantic type for easy serialization + qc_data = TypeAdapter(List[QcMethodIndex]) + + LOG.info("Storing results to: %s", output.name) + output.write(qc_data.dump_json(results, indent=3).decode("utf-8")) + click.secho("Finished generating QC output", fg="green") diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 646d5de..85d286c 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -110,8 +110,10 @@ class GeneBase(BaseModel): ) close_seq_name: Optional[str] = Field( default=None, - description=("Name of the closest competing hit if there " - "are multiple equaly good hits"), + description=( + "Name of the closest competing hit if there " + "are multiple equaly good hits" + ), ) diff --git a/prp/models/qc.py b/prp/models/qc.py index 73d3941..232f4fa 100644 --- a/prp/models/qc.py +++ b/prp/models/qc.py @@ -2,9 +2,10 @@ from enum import Enum from typing import Dict -from pydantic import BaseModel +from pydantic import BaseModel, Field from .base import RWModel +from .typing import TypingSoftware class QcSoftware(Enum): @@ -13,6 +14,7 @@ class QcSoftware(Enum): QUAST = "quast" FASTQC = "fastqc" POSTALIGNQC = "postalignqc" + CHEWBBACA = TypingSoftware.CHEWBBACA.value class QuastQcResult(BaseModel): @@ -42,6 +44,12 @@ class PostAlignQcResult(BaseModel): dup_reads: int +class GenomeCompleteness(BaseModel): + """Alignment QC metrics.""" + + n_missing: int = Field(..., description="Number of missing cgMLST alleles") + + class QcMethodIndex(RWModel): """QC results container. @@ -51,4 +59,4 @@ class QcMethodIndex(RWModel): software: QcSoftware version: str | None = None - result: QuastQcResult | PostAlignQcResult + result: QuastQcResult | PostAlignQcResult | GenomeCompleteness diff --git a/prp/models/typing.py b/prp/models/typing.py index 0bdbffc..e2ec3e2 100644 --- a/prp/models/typing.py +++ b/prp/models/typing.py @@ -1,7 +1,7 @@ """Typing related data models""" from enum import Enum -from typing import Dict, List, Optional, Union, Any +from typing import Any, Dict, List, Optional, Union from pydantic import Field diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index 91802cc..d21d6d6 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -3,7 +3,7 @@ import re from typing import Any, Dict, Tuple -from ...models.phenotype import ElementTypeResult, ElementType, ElementAmrSubtype +from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex diff --git a/tests/__init__.py b/tests/__init__.py index 1b72238..ad859c9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -"""PRP unit tests.""" \ No newline at end of file +"""PRP unit tests.""" diff --git a/tests/fixtures/ecoli/__init__.py b/tests/fixtures/ecoli/__init__.py index 6e6c40f..4d366ad 100644 --- a/tests/fixtures/ecoli/__init__.py +++ b/tests/fixtures/ecoli/__init__.py @@ -1,6 +1,7 @@ """Ecoli input data fixutres.""" import pytest +import json from ..fixtures import data_path @@ -58,3 +59,10 @@ def ecoli_chewbbaca_path(data_path): @pytest.fixture() def ecoli_bracken_path(data_path): return str(data_path.joinpath("ecoli", "bracken.out")) + + +@pytest.fixture() +def ecoli_cdm_input(data_path): + path = data_path.joinpath("ecoli", "cdm_input.json") + with open(path) as inpt: + return json.load(inpt) diff --git a/tests/fixtures/ecoli/cdm_input.json b/tests/fixtures/ecoli/cdm_input.json new file mode 100644 index 0000000..9f4067a --- /dev/null +++ b/tests/fixtures/ecoli/cdm_input.json @@ -0,0 +1,46 @@ +[ + { + "software": "postalignqc", + "version": null, + "result": { + "ins_size": 375, + "ins_size_dev": 328, + "mean_cov": 210, + "pct_above_x": { + "1": 99.8908335372424, + "500": 0.0, + "250": 8.28531811957226, + "10": 99.8664503084393, + "1000": 0.0, + "30": 99.8493219487695, + "100": 99.7996110703293 + }, + "mapped_reads": 6078594, + "tot_reads": 7579030, + "iqr_median": 0.19047619047619, + "dup_pct": 0.0, + "dup_reads": 0 + } + }, + { + "software": "quast", + "version": null, + "result": { + "total_length": 5103744, + "reference_length": 4641652, + "largest_contig": 359420, + "n_contigs": 109, + "n50": 173071, + "assembly_gc": 50.81, + "reference_gc": 50.79, + "duplication_ratio": 1.001 + } + }, + { + "software": "chewbbaca", + "version": null, + "result": { + "n_missing": 4228 + } + } +] \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index ae89352..3cb1002 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,8 +1,9 @@ """Test PRP cli functions.""" +import json from click.testing import CliRunner -from prp.cli import create_output, print_schema +from prp.cli import create_bonsai_input, create_cdm_input, print_schema def test_create_output_saureus( @@ -26,7 +27,7 @@ def test_create_output_saureus( runner = CliRunner() with runner.isolated_filesystem(): result = runner.invoke( - create_output, + create_bonsai_input, [ "-i", sample_id, @@ -50,6 +51,7 @@ def test_create_output_saureus( saureus_mlst_path, "--cgmlst", saureus_chewbbaca_path, + "--output", output_file, ], ) @@ -77,7 +79,7 @@ def test_create_output_ecoli( runner = CliRunner() with runner.isolated_filesystem(): result = runner.invoke( - create_output, + create_bonsai_input, [ "-i", sample_id, @@ -101,15 +103,36 @@ def test_create_output_ecoli( ecoli_mlst_path, "--cgmlst", ecoli_chewbbaca_path, + "--output", output_file, ], ) assert result.exit_code == 0 -def test_print_schema_cmd(): - """Test print schema command.""" +def test_cdm_input_cmd(ecoli_quast_path, ecoli_bwa_path, ecoli_chewbbaca_path, ecoli_cdm_input): + """Test command for creating CDM input.""" runner = CliRunner() with runner.isolated_filesystem(): - result = runner.invoke(print_schema) + output_fname = "test_ouptut" + result = runner.invoke( + create_cdm_input, + [ + "--quast", + ecoli_quast_path, + "--quality", + ecoli_bwa_path, + "--cgmlst", + ecoli_chewbbaca_path, + "--output", + output_fname, + ], + ) + + # test successful execution of command assert result.exit_code == 0 + + # test correct output format + with open(output_fname) as inpt: + cmd_output = json.load(inpt) + assert cmd_output == ecoli_cdm_input \ No newline at end of file