Skip to content

Commit

Permalink
Merge pull request #68 from Clinical-Genomics-Lund/57-add-shigapass-t…
Browse files Browse the repository at this point in the history
…o-output

Add shigapass to output
  • Loading branch information
ryanjameskennedy authored May 29, 2024
2 parents 4a73247 + 81c4a48 commit 13e3b3c
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### Added

- Added Shigapass output

### Fixed

### Changed
Expand Down
10 changes: 10 additions & 0 deletions prp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
parse_quast_results,
parse_resfinder_amr_pred,
parse_serotypefinder_oh_typing,
parse_shigapass_pred,
parse_tbprofiler_amr_pred,
parse_tbprofiler_lineage_results,
parse_virulencefinder_stx_typing,
Expand Down Expand Up @@ -104,6 +105,7 @@ def cli():
)
@click.option("-p", "--quality", type=click.Path(), help="postalignqc qc results")
@click.option("-k", "--mykrobe", type=click.Path(), help="mykrobe results")
@click.option("-g", "--shigapass", type=click.Path(), help="shigapass results")
@click.option("-t", "--tbprofiler", type=click.Path(), help="tbprofiler results")
@click.option("--bam", type=click.Path(), help="Read mapping to reference genome")
@click.option(
Expand Down Expand Up @@ -139,6 +141,7 @@ def create_bonsai_input(
serotypefinder,
quality,
mykrobe,
shigapass,
tbprofiler,
bam,
reference_genome_fasta,
Expand Down Expand Up @@ -229,6 +232,13 @@ def create_bonsai_input(
if res is not None:
results["typing_result"].extend(res)

if shigapass:
LOG.info("Parse shigapass results")
# Shigatyping
res: MethodIndex | None = parse_shigapass_pred(shigapass, TypingMethod.SHIGATYPE)
if res is not None:
results["typing_result"].extend(res)

# species id
results["species_prediction"] = []
if kraken:
Expand Down
16 changes: 15 additions & 1 deletion prp/models/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class PhenotypeInfo(RWModel):


class DatabaseReference(RWModel):
"""Refernece to a database."""
"""Reference to a database."""

ref_database: Optional[str] = None
ref_id: Optional[str] = None
Expand Down Expand Up @@ -185,6 +185,20 @@ class SerotypeGene(GeneBase):
"""Container for serotype gene information"""


class Shigatype(BaseModel):
"""Container for shigatype gene information"""

rfb: Optional[str] = None
rfb_hits: Optional[float] = None
mlst: Optional[str] = None
flic: Optional[str] = None
crispr: Optional[str] = None
ipah: Optional[str] = None
predicted_serotype: Optional[str] = None
predicted_flex_serotype: Optional[str] = None
comments: Optional[str] = None


class VirulenceGene(GeneBase, DatabaseReference):
"""Container for virulence gene information"""

Expand Down
2 changes: 2 additions & 0 deletions prp/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
TypingResultCgMlst,
TypingResultGeneAllele,
TypingResultMlst,
TypingResultShiga,
TypingSoftware,
)

Expand All @@ -29,6 +30,7 @@ class MethodIndex(RWModel):
TypingResultMlst,
TypingResultCgMlst,
TypingResultGeneAllele,
TypingResultShiga,
TbProfilerLineage,
ResultLineageBase,
]
Expand Down
8 changes: 7 additions & 1 deletion prp/models/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pydantic import Field

from .base import RWModel
from .phenotype import SerotypeGene, VirulenceGene
from .phenotype import SerotypeGene, VirulenceGene, Shigatype


class TypingSoftware(Enum):
Expand All @@ -18,6 +18,7 @@ class TypingSoftware(Enum):
MYKROBE = "mykrobe"
VIRULENCEFINDER = "virulencefinder"
SEROTYPEFINDER = "serotypefinder"
SHIGAPASS = "shigapass"


class TypingMethod(Enum):
Expand All @@ -29,6 +30,7 @@ class TypingMethod(Enum):
STX = "stx"
OTYPE = "O_type"
HTYPE = "H_type"
SHIGATYPE = "shigatype"


class ChewbbacaErrors(str, Enum):
Expand Down Expand Up @@ -71,6 +73,10 @@ class TypingResultCgMlst(ResultMlstBase):
n_missing: int = Field(0, alias="nNovel")


class TypingResultShiga(Shigatype):
"""Shigatype results"""


class ResultLineageBase(RWModel):
"""Lineage results"""

Expand Down
1 change: 1 addition & 0 deletions prp/parse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
parse_amrfinder_vir_pred,
parse_mykrobe_amr_pred,
parse_resfinder_amr_pred,
parse_shigapass_pred,
parse_tbprofiler_amr_pred,
parse_virulencefinder_vir_pred,
)
Expand Down
1 change: 1 addition & 0 deletions prp/parse/phenotype/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from .amrfinder import parse_amrfinder_amr_pred, parse_amrfinder_vir_pred
from .mykrobe import parse_mykrobe_amr_pred
from .resfinder import parse_resfinder_amr_pred
from .shigapass import parse_shigapass_pred
from .tbprofiler import parse_tbprofiler_amr_pred
from .virulencefinder import parse_virulencefinder_vir_pred
66 changes: 66 additions & 0 deletions prp/parse/phenotype/shigapass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Functions for parsing shigapass result."""
import re
import logging
import pandas as pd
import numpy as np
from typing import Tuple

from ...models.phenotype import (
ElementType,
ElementTypeResult,
Shigatype,
)
from ...models.typing import TypingSoftware as Software
from ...models.typing import TypingResultShiga
from ...models.sample import MethodIndex

LOG = logging.getLogger(__name__)

def parse_shigapass_pred(file: str, element_type: ElementType) -> ElementTypeResult:
"""Parse shigapass prediction results."""
LOG.info("Parsing shigapass prediction")
pred_result = []
with open(file, "rb") as csvfile:
hits = pd.read_csv(csvfile, delimiter=";")
hits = hits.rename(
columns={
"Name": "sample_name",
"rfb_hits,(%)": "rfb_hits",
"MLST": "mlst",
"fliC": "flic",
"CRISPR": "crispr",
"ipaH": "ipah",
"Predicted_Serotype": "predicted_serotype",
"Predicted_FlexSerotype": "predicted_flex_serotype",
"Comments": "comments",
}
)
hits.replace(['ND', 'none', np.nan], [None, None, None], inplace=True)
for row_idx in range(len(hits)):
shigatype_results = _parse_shigapass_results(hits, row_idx)
result = TypingResultShiga(**shigatype_results.model_dump())
pred_result.append(MethodIndex(type=element_type, result=result, software=Software.SHIGAPASS))
return pred_result

def _extract_percentage(rfb_hits):
pattern = r'\((\d+\.\d+)%\)'
match = re.search(pattern, rfb_hits)
if match:
percentile_value = match.group(1)
else:
percentile_value = 0.0
return percentile_value

def _parse_shigapass_results(predictions: dict, row: int) -> Shigatype:
return Shigatype(
rfb=predictions.loc[row,"rfb"],
rfb_hits=_extract_percentage(str(predictions.loc[row,"rfb_hits"])),
mlst=predictions.loc[row,"mlst"],
flic=predictions.loc[row,"flic"],
crispr=predictions.loc[row,"crispr"],
ipah=predictions.loc[row,"ipah"],
predicted_serotype=predictions.loc[row,"predicted_serotype"],
predicted_flex_serotype=predictions.loc[row,"predicted_flex_serotype"],
comments=predictions.loc[row,"comments"],
)

0 comments on commit 13e3b3c

Please sign in to comment.