From b90fb4dfb1c98df42be045fe0bcac042a792b645 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 3 Jan 2024 13:09:23 +0100 Subject: [PATCH] Fix parsers --- prp/parse/phenotype/mykrobe.py | 20 ++++--------- prp/parse/phenotype/resfinder.py | 8 +---- prp/parse/phenotype/tbprofiler.py | 7 ++--- prp/parse/phenotype/utils.py | 49 +------------------------------ prp/parse/typing.py | 4 +-- 5 files changed, 12 insertions(+), 76 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index bdf7886..fc03013 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -7,7 +7,7 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex -from .utils import is_prediction_result_empty, _default_amr_phenotype +from .utils import is_prediction_result_empty LOG = logging.getLogger(__name__) @@ -45,12 +45,9 @@ def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: gene = ResistanceGene( gene_symbol=element_type["variants"].split("_")[0], - accession=None, depth=depth, - identity=None, coverage=coverage, drugs=[element_type["drug"].lower()], - phenotypes=[_default_amr_phenotype()], element_type=ElementType.AMR, element_subtype=ElementAmrSubtype.AMR, ) @@ -99,7 +96,7 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] if not element_type["susceptibility"].upper() == "R": continue - if element_type["variants"] is not None: + if element_type["variants"] is None: continue try: @@ -107,25 +104,18 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] except AttributeError: depth = None - var_info = element_type["variants"].split("-")[1] + var_info = element_type["variants"].split("-")[1].split(":")[0] _, ref_nt, alt_nt, position = get_mutation_type(var_info) var_nom = element_type["variants"].split("-")[0].split("_")[1] var_type, *_ = get_mutation_type(var_nom) variant = ResistanceVariant( variant_type=var_type, - genes=[element_type["variants"].split("_")[0]], - phenotypes=[_default_amr_phenotype()], + gene_symbol=element_type["variants"].split("_")[0], position=position, ref_nt=ref_nt, alt_nt=alt_nt, depth=depth, - ref_database=None, - ref_id=None, - type=None, change=var_nom, - nucleotide_change=None, - protein_change=None, - annotation=None, drugs=[element_type["drug"].lower()], ) results.append(variant) @@ -139,7 +129,7 @@ def parse_mykrobe_amr_pred( LOG.info("Parsing mykrobe prediction") resistance = ElementTypeResult( phenotypes=_get_mykrobe_amr_sr_profie(prediction), - genes=_parse_mykrobe_amr_genes(prediction), + genes=[], mutations=_parse_mykrobe_amr_variants(prediction), ) diff --git a/prp/parse/phenotype/resfinder.py b/prp/parse/phenotype/resfinder.py index 17f03d7..aa9c224 100644 --- a/prp/parse/phenotype/resfinder.py +++ b/prp/parse/phenotype/resfinder.py @@ -14,7 +14,6 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex -from .utils import _default_resistance LOG = logging.getLogger(__name__) @@ -221,7 +220,7 @@ def _parse_resfinder_amr_genes( """Get resistance genes from resfinder result.""" results = [] if not "seq_regions" in resfinder_result: - return _default_resistance().genes + return [ResistanceGene()] for info in resfinder_result["seq_regions"].values(): # Get only acquired resistance genes @@ -327,7 +326,6 @@ def _parse_resfinder_amr_variants( ) -> Tuple[ResistanceVariant, ...]: """Get resistance genes from resfinder result.""" results = [] - igenes = [] for info in resfinder_result["seq_variations"].values(): # Get only variants from desired phenotypes if limit_to_phenotypes is not None: @@ -350,9 +348,6 @@ def _parse_resfinder_amr_variants( var_type = VariantType.DELETION else: raise ValueError("Output has no known mutation type") - if not "seq_regions" in info: - # igenes = _default_resistance().genes - igenes = [""] # get gene symbol and accession nr gene_symbol, _, gene_accnr = info["seq_regions"][0].split(";;") @@ -376,7 +371,6 @@ def _parse_resfinder_amr_variants( gene_symbol=gene_symbol, accession=gene_accnr, close_seq_name=gene_accnr, - genes=igenes, phenotypes=phenotype, position=info["ref_start_pos"], ref_nt=ref_nt, diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index 7fbc746..1a62021 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -7,7 +7,6 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceVariant from ...models.sample import MethodIndex -from .utils import _default_variant, _default_amr_phenotype LOG = logging.getLogger(__name__) @@ -53,8 +52,8 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant variant = ResistanceVariant( variant_type=var_type, - genes=[hit["gene"]], - phenotypes=[_default_amr_phenotype()], + gene_symbol=hit["gene"], + phenotypes=[], position=int(hit["genome_pos"]), ref_nt=hit["ref"], alt_nt=hit["alt"], @@ -69,7 +68,7 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant results.append(variant) if not results: - results = _default_variant().mutations + results = [ResistanceVariant()] return results return results diff --git a/prp/parse/phenotype/utils.py b/prp/parse/phenotype/utils.py index 2b0a3b0..6ea7bd7 100644 --- a/prp/parse/phenotype/utils.py +++ b/prp/parse/phenotype/utils.py @@ -1,55 +1,8 @@ """Shared utility functions.""" -from ...models.phenotype import ElementTypeResult, ResistanceGene +from ...models.phenotype import ElementTypeResult from ...models.phenotype import ElementType, PhenotypeInfo -def _default_resistance() -> ElementTypeResult: - gene = ResistanceGene( - name=None, - virulence_category=None, - accession=None, - depth=None, - identity=None, - coverage=None, - ref_start_pos=None, - ref_end_pos=None, - ref_gene_length=None, - alignment_length=None, - ref_database=None, - phenotypes=[], - ref_id=None, - contig_id=None, - sequence_name=None, - ass_start_pos=None, - ass_end_pos=None, - strand=None, - element_type=None, - element_subtype=None, - target_length=None, - res_class=None, - res_subclass=None, - method=None, - close_seq_name=None, - ) - genes = [ - gene, - ] - return ElementTypeResult(phenotypes=[], genes=genes, mutations=[]) - - -def _default_variant() -> ElementTypeResult: - mutation = ResistanceGene( - variant_type=None, - genes=None, - phenotypes=[], - position=None, - ref_nt=None, - alt_nt=None, - depth=None, - ) - mutations = [mutation] - return ElementTypeResult(phenotypes=[], genes=[], mutations=mutations) - def _default_amr_phenotype() -> PhenotypeInfo: return PhenotypeInfo( type = ElementType.AMR, diff --git a/prp/parse/typing.py b/prp/parse/typing.py index 8346963..1790e29 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -176,8 +176,8 @@ def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None: vir_gene = parse_vir_gene(hit) gene = TypingResultGeneAllele(**vir_gene.model_dump()) pred_result = MethodIndex( - type=TypingMethod.STX, - software=Software.VIRULENCEFINDER, + type=TypingMethod.STX, + software=Software.VIRULENCEFINDER, result=gene ) return pred_result