Skip to content

Commit

Permalink
Merge pull request #4 from Clinical-Genomics-Lund/3-args-for-cdm-qc-o…
Browse files Browse the repository at this point in the history
…utput-json-filepath-and-analysis-results

Added CLI command for generating a QC report for CDM
  • Loading branch information
mhkc authored Dec 12, 2023
2 parents a33d80e + 32dc055 commit 1de5ffb
Show file tree
Hide file tree
Showing 10 changed files with 141 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### Added

- CLI command for generating QC report for CDM

### Fixed

### Changed
Expand Down
44 changes: 39 additions & 5 deletions prp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from typing import List

import click
from pydantic import ValidationError
from pydantic import TypeAdapter, ValidationError

from .models.metadata import SoupType, SoupVersion
from .models.phenotype import ElementType
from .models.qc import QcMethodIndex
from .models.qc import QcMethodIndex, QcSoftware
from .models.sample import MethodIndex, PipelineResult
from .models.typing import TypingMethod
from .parse import (
Expand Down Expand Up @@ -85,8 +85,8 @@ def cli():
@click.option("-k", "--mykrobe", type=click.File(), help="mykrobe results")
@click.option("-t", "--tbprofiler", type=click.File(), help="tbprofiler results")
@click.option("--correct_alleles", is_flag=True, help="Correct alleles")
@click.argument("output", type=click.File("w"))
def create_output(
@click.option("-o", "--output", required=True, type=click.File("w"), help="output filepath")
def create_bonsai_input(
sample_id,
run_metadata,
quast,
Expand Down Expand Up @@ -247,7 +247,7 @@ def print_schema():


@cli.command()
@click.argument("output", type=click.File("r"))
@click.option("-o", "--output", required=True, type=click.File("r"))
def validate(output):
"""Validate output format of result json file."""
js = json.load(output)
Expand All @@ -258,3 +258,37 @@ def validate(output):
click.secho(err)
else:
click.secho(f'The file "{output.name}" is valid', fg="green")


@cli.command()
@click.option("-q", "--quast", type=click.File(), help="Quast quality control metrics")
@click.option("-p", "--quality", type=click.File(), help="postalignqc qc results")
@click.option("-c", "--cgmlst", type=click.File(), help="cgMLST prediction results")
@click.option("--correct_alleles", is_flag=True, help="Correct alleles")
@click.option("-o", "--output", required=True, type=click.File("w"), help="output filepath")
def create_cdm_input(quast, quality, cgmlst, correct_alleles, output) -> None:
"""Format QC metrics into CDM compatible input file."""
results = []
if quality:
LOG.info("Parse quality results")
res: QcMethodIndex = parse_postalignqc_results(quality)
results.append(res)

if quast:
LOG.info("Parse quast results")
res: QcMethodIndex = parse_quast_results(quast)
results.append(res)

if cgmlst:
LOG.info("Parse cgmlst results")
res: MethodIndex = parse_cgmlst_results(cgmlst, correct_alleles=correct_alleles)
n_missing_loci = QcMethodIndex(
software=QcSoftware.CHEWBBACA, result={"n_missing": res.result.n_missing}
)
results.append(n_missing_loci)
# cast output as pydantic type for easy serialization
qc_data = TypeAdapter(List[QcMethodIndex])

LOG.info("Storing results to: %s", output.name)
output.write(qc_data.dump_json(results, indent=3).decode("utf-8"))
click.secho("Finished generating QC output", fg="green")
6 changes: 4 additions & 2 deletions prp/models/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,10 @@ class GeneBase(BaseModel):
)
close_seq_name: Optional[str] = Field(
default=None,
description=("Name of the closest competing hit if there "
"are multiple equaly good hits"),
description=(
"Name of the closest competing hit if there "
"are multiple equaly good hits"
),
)


Expand Down
12 changes: 10 additions & 2 deletions prp/models/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from enum import Enum
from typing import Dict

from pydantic import BaseModel
from pydantic import BaseModel, Field

from .base import RWModel
from .typing import TypingSoftware


class QcSoftware(Enum):
Expand All @@ -13,6 +14,7 @@ class QcSoftware(Enum):
QUAST = "quast"
FASTQC = "fastqc"
POSTALIGNQC = "postalignqc"
CHEWBBACA = TypingSoftware.CHEWBBACA.value


class QuastQcResult(BaseModel):
Expand Down Expand Up @@ -42,6 +44,12 @@ class PostAlignQcResult(BaseModel):
dup_reads: int


class GenomeCompleteness(BaseModel):
"""Alignment QC metrics."""

n_missing: int = Field(..., description="Number of missing cgMLST alleles")


class QcMethodIndex(RWModel):
"""QC results container.
Expand All @@ -51,4 +59,4 @@ class QcMethodIndex(RWModel):

software: QcSoftware
version: str | None = None
result: QuastQcResult | PostAlignQcResult
result: QuastQcResult | PostAlignQcResult | GenomeCompleteness
2 changes: 1 addition & 1 deletion prp/models/typing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Typing related data models"""

from enum import Enum
from typing import Dict, List, Optional, Union, Any
from typing import Any, Dict, List, Optional, Union

from pydantic import Field

Expand Down
2 changes: 1 addition & 1 deletion prp/parse/phenotype/mykrobe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from typing import Any, Dict, Tuple

from ...models.phenotype import ElementTypeResult, ElementType, ElementAmrSubtype
from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult
from ...models.phenotype import PredictionSoftware as Software
from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType
from ...models.sample import MethodIndex
Expand Down
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"""PRP unit tests."""
"""PRP unit tests."""
8 changes: 8 additions & 0 deletions tests/fixtures/ecoli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Ecoli input data fixutres."""

import pytest
import json

from ..fixtures import data_path

Expand Down Expand Up @@ -58,3 +59,10 @@ def ecoli_chewbbaca_path(data_path):
@pytest.fixture()
def ecoli_bracken_path(data_path):
return str(data_path.joinpath("ecoli", "bracken.out"))


@pytest.fixture()
def ecoli_cdm_input(data_path):
path = data_path.joinpath("ecoli", "cdm_input.json")
with open(path) as inpt:
return json.load(inpt)
46 changes: 46 additions & 0 deletions tests/fixtures/ecoli/cdm_input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[
{
"software": "postalignqc",
"version": null,
"result": {
"ins_size": 375,
"ins_size_dev": 328,
"mean_cov": 210,
"pct_above_x": {
"1": 99.8908335372424,
"500": 0.0,
"250": 8.28531811957226,
"10": 99.8664503084393,
"1000": 0.0,
"30": 99.8493219487695,
"100": 99.7996110703293
},
"mapped_reads": 6078594,
"tot_reads": 7579030,
"iqr_median": 0.19047619047619,
"dup_pct": 0.0,
"dup_reads": 0
}
},
{
"software": "quast",
"version": null,
"result": {
"total_length": 5103744,
"reference_length": 4641652,
"largest_contig": 359420,
"n_contigs": 109,
"n50": 173071,
"assembly_gc": 50.81,
"reference_gc": 50.79,
"duplication_ratio": 1.001
}
},
{
"software": "chewbbaca",
"version": null,
"result": {
"n_missing": 4228
}
}
]
35 changes: 29 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Test PRP cli functions."""

import json
from click.testing import CliRunner

from prp.cli import create_output, print_schema
from prp.cli import create_bonsai_input, create_cdm_input, print_schema


def test_create_output_saureus(
Expand All @@ -26,7 +27,7 @@ def test_create_output_saureus(
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(
create_output,
create_bonsai_input,
[
"-i",
sample_id,
Expand All @@ -50,6 +51,7 @@ def test_create_output_saureus(
saureus_mlst_path,
"--cgmlst",
saureus_chewbbaca_path,
"--output",
output_file,
],
)
Expand Down Expand Up @@ -77,7 +79,7 @@ def test_create_output_ecoli(
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(
create_output,
create_bonsai_input,
[
"-i",
sample_id,
Expand All @@ -101,15 +103,36 @@ def test_create_output_ecoli(
ecoli_mlst_path,
"--cgmlst",
ecoli_chewbbaca_path,
"--output",
output_file,
],
)
assert result.exit_code == 0


def test_print_schema_cmd():
"""Test print schema command."""
def test_cdm_input_cmd(ecoli_quast_path, ecoli_bwa_path, ecoli_chewbbaca_path, ecoli_cdm_input):
"""Test command for creating CDM input."""
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(print_schema)
output_fname = "test_ouptut"
result = runner.invoke(
create_cdm_input,
[
"--quast",
ecoli_quast_path,
"--quality",
ecoli_bwa_path,
"--cgmlst",
ecoli_chewbbaca_path,
"--output",
output_fname,
],
)

# test successful execution of command
assert result.exit_code == 0

# test correct output format
with open(output_fname) as inpt:
cmd_output = json.load(inpt)
assert cmd_output == ecoli_cdm_input

0 comments on commit 1de5ffb

Please sign in to comment.