Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added CLI command for generating a QC report for CDM #4

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### Added

- CLI command for generating QC report for CDM

### Fixed

### Changed
Expand Down
44 changes: 39 additions & 5 deletions prp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from typing import List

import click
from pydantic import ValidationError
from pydantic import TypeAdapter, ValidationError

from .models.metadata import SoupType, SoupVersion
from .models.phenotype import ElementType
from .models.qc import QcMethodIndex
from .models.qc import QcMethodIndex, QcSoftware
from .models.sample import MethodIndex, PipelineResult
from .models.typing import TypingMethod
from .parse import (
Expand Down Expand Up @@ -85,8 +85,8 @@ def cli():
@click.option("-k", "--mykrobe", type=click.File(), help="mykrobe results")
@click.option("-t", "--tbprofiler", type=click.File(), help="tbprofiler results")
@click.option("--correct_alleles", is_flag=True, help="Correct alleles")
@click.argument("output", type=click.File("w"))
def create_output(
@click.option("-o", "--output", required=True, type=click.File("w"), help="output filepath")
def create_bonsai_input(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean the subarg changes from create-output to create-bonsai-input? I.e. prp create-bonsai-input $args?

sample_id,
run_metadata,
quast,
Expand Down Expand Up @@ -247,7 +247,7 @@ def print_schema():


@cli.command()
@click.argument("output", type=click.File("r"))
@click.option("-o", "--output", required=True, type=click.File("r"))
def validate(output):
"""Validate output format of result json file."""
js = json.load(output)
Expand All @@ -258,3 +258,37 @@ def validate(output):
click.secho(err)
else:
click.secho(f'The file "{output.name}" is valid', fg="green")


@cli.command()
@click.option("-q", "--quast", type=click.File(), help="Quast quality control metrics")
@click.option("-p", "--quality", type=click.File(), help="postalignqc qc results")
@click.option("-c", "--cgmlst", type=click.File(), help="cgMLST prediction results")
@click.option("--correct_alleles", is_flag=True, help="Correct alleles")
@click.option("-o", "--output", required=True, type=click.File("w"), help="output filepath")
def create_cdm_input(quast, quality, cgmlst, correct_alleles, output) -> None:
"""Format QC metrics into CDM compatible input file."""
results = []
if quality:
LOG.info("Parse quality results")
res: QcMethodIndex = parse_postalignqc_results(quality)
results.append(res)

if quast:
LOG.info("Parse quast results")
res: QcMethodIndex = parse_quast_results(quast)
results.append(res)

if cgmlst:
LOG.info("Parse cgmlst results")
res: MethodIndex = parse_cgmlst_results(cgmlst, correct_alleles=correct_alleles)
n_missing_loci = QcMethodIndex(
software=QcSoftware.CHEWBBACA, result={"n_missing": res.result.n_missing}
)
results.append(n_missing_loci)
# cast output as pydantic type for easy serialization
qc_data = TypeAdapter(List[QcMethodIndex])

LOG.info("Storing results to: %s", output.name)
output.write(qc_data.dump_json(results, indent=3).decode("utf-8"))
click.secho("Finished generating QC output", fg="green")
6 changes: 4 additions & 2 deletions prp/models/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,10 @@ class GeneBase(BaseModel):
)
close_seq_name: Optional[str] = Field(
default=None,
description=("Name of the closest competing hit if there "
"are multiple equaly good hits"),
description=(
"Name of the closest competing hit if there "
"are multiple equaly good hits"
),
)


Expand Down
12 changes: 10 additions & 2 deletions prp/models/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from enum import Enum
from typing import Dict

from pydantic import BaseModel
from pydantic import BaseModel, Field

from .base import RWModel
from .typing import TypingSoftware


class QcSoftware(Enum):
Expand All @@ -13,6 +14,7 @@ class QcSoftware(Enum):
QUAST = "quast"
FASTQC = "fastqc"
POSTALIGNQC = "postalignqc"
CHEWBBACA = TypingSoftware.CHEWBBACA.value


class QuastQcResult(BaseModel):
Expand Down Expand Up @@ -42,6 +44,12 @@ class PostAlignQcResult(BaseModel):
dup_reads: int


class GenomeCompleteness(BaseModel):
"""Alignment QC metrics."""

n_missing: int = Field(..., description="Number of missing cgMLST alleles")


class QcMethodIndex(RWModel):
"""QC results container.

Expand All @@ -51,4 +59,4 @@ class QcMethodIndex(RWModel):

software: QcSoftware
version: str | None = None
result: QuastQcResult | PostAlignQcResult
result: QuastQcResult | PostAlignQcResult | GenomeCompleteness
2 changes: 1 addition & 1 deletion prp/models/typing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Typing related data models"""

from enum import Enum
from typing import Dict, List, Optional, Union, Any
from typing import Any, Dict, List, Optional, Union

from pydantic import Field

Expand Down
2 changes: 1 addition & 1 deletion prp/parse/phenotype/mykrobe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from typing import Any, Dict, Tuple

from ...models.phenotype import ElementTypeResult, ElementType, ElementAmrSubtype
from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult
from ...models.phenotype import PredictionSoftware as Software
from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType
from ...models.sample import MethodIndex
Expand Down
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"""PRP unit tests."""
"""PRP unit tests."""
8 changes: 8 additions & 0 deletions tests/fixtures/ecoli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Ecoli input data fixutres."""

import pytest
import json

from ..fixtures import data_path

Expand Down Expand Up @@ -58,3 +59,10 @@ def ecoli_chewbbaca_path(data_path):
@pytest.fixture()
def ecoli_bracken_path(data_path):
return str(data_path.joinpath("ecoli", "bracken.out"))


@pytest.fixture()
def ecoli_cdm_input(data_path):
path = data_path.joinpath("ecoli", "cdm_input.json")
with open(path) as inpt:
return json.load(inpt)
46 changes: 46 additions & 0 deletions tests/fixtures/ecoli/cdm_input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[
{
"software": "postalignqc",
"version": null,
"result": {
"ins_size": 375,
"ins_size_dev": 328,
"mean_cov": 210,
"pct_above_x": {
"1": 99.8908335372424,
"500": 0.0,
"250": 8.28531811957226,
"10": 99.8664503084393,
"1000": 0.0,
"30": 99.8493219487695,
"100": 99.7996110703293
},
"mapped_reads": 6078594,
"tot_reads": 7579030,
"iqr_median": 0.19047619047619,
"dup_pct": 0.0,
"dup_reads": 0
}
},
{
"software": "quast",
"version": null,
"result": {
"total_length": 5103744,
"reference_length": 4641652,
"largest_contig": 359420,
"n_contigs": 109,
"n50": 173071,
"assembly_gc": 50.81,
"reference_gc": 50.79,
"duplication_ratio": 1.001
}
},
{
"software": "chewbbaca",
"version": null,
"result": {
"n_missing": 4228
}
}
]
35 changes: 29 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Test PRP cli functions."""

import json
from click.testing import CliRunner

from prp.cli import create_output, print_schema
from prp.cli import create_bonsai_input, create_cdm_input, print_schema


def test_create_output_saureus(
Expand All @@ -26,7 +27,7 @@ def test_create_output_saureus(
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(
create_output,
create_bonsai_input,
[
"-i",
sample_id,
Expand All @@ -50,6 +51,7 @@ def test_create_output_saureus(
saureus_mlst_path,
"--cgmlst",
saureus_chewbbaca_path,
"--output",
output_file,
],
)
Expand Down Expand Up @@ -77,7 +79,7 @@ def test_create_output_ecoli(
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(
create_output,
create_bonsai_input,
[
"-i",
sample_id,
Expand All @@ -101,15 +103,36 @@ def test_create_output_ecoli(
ecoli_mlst_path,
"--cgmlst",
ecoli_chewbbaca_path,
"--output",
output_file,
],
)
assert result.exit_code == 0


def test_print_schema_cmd():
"""Test print schema command."""
def test_cdm_input_cmd(ecoli_quast_path, ecoli_bwa_path, ecoli_chewbbaca_path, ecoli_cdm_input):
"""Test command for creating CDM input."""
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(print_schema)
output_fname = "test_ouptut"
result = runner.invoke(
create_cdm_input,
[
"--quast",
ecoli_quast_path,
"--quality",
ecoli_bwa_path,
"--cgmlst",
ecoli_chewbbaca_path,
"--output",
output_fname,
],
)

# test successful execution of command
assert result.exit_code == 0

# test correct output format
with open(output_fname) as inpt:
cmd_output = json.load(inpt)
assert cmd_output == ecoli_cdm_input
Loading