diff --git a/CHANGELOG.md b/CHANGELOG.md index 094873a..dea76e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Added ### Fixed + - Handle alt types for emmtyper ### Changed diff --git a/prp/models/typing.py b/prp/models/typing.py index 4d04e00..4307ee8 100644 --- a/prp/models/typing.py +++ b/prp/models/typing.py @@ -103,9 +103,9 @@ class TypingResultEmm(RWModel): """Container for emmtype gene information""" cluster_count: int - emmtype: str - emm_like_alleles: list[str] - emm_cluster: str + emmtype: str | None = None + emm_like_alleles: list[str] | None = None + emm_cluster: str | None = None class EmmTypingMethodIndex(RWModel): diff --git a/prp/parse/phenotype/emmtyper.py b/prp/parse/phenotype/emmtyper.py index 0e4049a..65eefa3 100644 --- a/prp/parse/phenotype/emmtyper.py +++ b/prp/parse/phenotype/emmtyper.py @@ -16,6 +16,7 @@ def parse_emmtyper_pred(path: str) -> EmmTypingMethodIndex: pred_result = [] df = pd.read_csv(path, sep='\t', header=None) df.columns = ["sample_name", "cluster_count", "emmtype", "emm_like_alleles", "emm_cluster"] + df.replace(["-", ""], None, inplace=True) df_loa = df.to_dict(orient="records") for emmtype_array in df_loa: emmtype_results = _parse_emmtyper_results(emmtype_array) @@ -31,10 +32,9 @@ def parse_emmtyper_pred(path: str) -> EmmTypingMethodIndex: def _parse_emmtyper_results(info: dict[str, Any]) -> TypingResultEmm: """Parse emm gene prediction results.""" - emm_like_alleles = info["emm_like_alleles"].split(";") + emm_like_alleles = info["emm_like_alleles"].split(";") if not pd.isna(info["emm_like_alleles"]) else None return TypingResultEmm( - # info - cluster_count=info["cluster_count"], + cluster_count=int(info["cluster_count"]), emmtype=info["emmtype"], emm_like_alleles=emm_like_alleles, emm_cluster=info["emm_cluster"],