diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index fe1f81d..7e1a18e 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -50,7 +50,7 @@ jobs: runs-on: ubuntu-latest environment: name: pypi - url: https://pypi.org/p/ga4gh-va-spec + url: https://pypi.org/p/ga4gh.va_spec permissions: id-token: write # IMPORTANT: mandatory for trusted publishing steps: diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..9d51a2b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "submodules/va_spec"] + path = submodules/va_spec + url = https://github.com/ga4gh/va-spec + branch = 1.x diff --git a/pyproject.toml b/pyproject.toml index 5cbda40..5374c81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,11 @@ keywords = [ ] requires-python = ">=3.10" dynamic = ["version"] -dependencies = [] +dependencies = [ + "ga4gh.vrs~=2.0.0a12", + "ga4gh.cat_vrs~=0.1.0", + "pydantic==2.*" +] [project.optional-dependencies] dev = [ @@ -132,7 +136,9 @@ ignore = [ # ANN102 - missing-type-cls # S101 - assert # B011 - assert-false +# N815 - mixed-case-variable-in-class-scope "tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"] +"src/ga4gh/va_spec/profiles/*" = ["ANN102", "N815"] [tool.setuptools.packages.find] where = ["src"] diff --git a/src/ga4gh/va_spec/profiles/__init__.py b/src/ga4gh/va_spec/profiles/__init__.py new file mode 100644 index 0000000..558a390 --- /dev/null +++ b/src/ga4gh/va_spec/profiles/__init__.py @@ -0,0 +1,44 @@ +"""Package for VA-Spec Python implementation""" + +from .assay_var_effect import ( + AssayVariantEffectClinicalClassificationStatement, + AssayVariantEffectFunctionalClassificationStatement, + AssayVariantEffectMeasurementStudyResult, + AveClinicalClassification, + AveFunctionalClassification, +) +from .caf_study_result import CohortAlleleFrequencyStudyResult +from .var_path_stmt import PenetranceQualifier, VariantPathogenicityStatement +from .var_study_stmt import ( + AlleleOriginQualifier, + AllelePrevalenceQualifier, + DiagnosticPredicate, + OncogenicPredicate, + PrognosticPredicate, + TherapeuticResponsePredicate, + VariantDiagnosticStudyStatement, + VariantOncogenicityStudyStatement, + VariantPrognosticStudyStatement, + VariantTherapeuticResponseStudyStatement, +) + +__all__ = [ + "AveFunctionalClassification", + "AveClinicalClassification", + "AssayVariantEffectFunctionalClassificationStatement", + "AssayVariantEffectClinicalClassificationStatement", + "AssayVariantEffectMeasurementStudyResult", + "CohortAlleleFrequencyStudyResult", + "PenetranceQualifier", + "VariantPathogenicityStatement", + "AlleleOriginQualifier", + "DiagnosticPredicate", + "OncogenicPredicate", + "PrognosticPredicate", + "TherapeuticResponsePredicate", + "AllelePrevalenceQualifier", + "VariantDiagnosticStudyStatement", + "VariantOncogenicityStudyStatement", + "VariantPrognosticStudyStatement", + "VariantTherapeuticResponseStudyStatement", +] diff --git a/src/ga4gh/va_spec/profiles/assay_var_effect.py b/src/ga4gh/va_spec/profiles/assay_var_effect.py new file mode 100644 index 0000000..5225f98 --- /dev/null +++ b/src/ga4gh/va_spec/profiles/assay_var_effect.py @@ -0,0 +1,134 @@ +"""VA Spec Assay Variant Effect statement and study result Profiles""" + +from __future__ import annotations + +from enum import Enum +from typing import Literal + +from ga4gh.cat_vrs.core_models import CategoricalVariant +from ga4gh.core.entity_models import ( + IRI, + Coding, + DataSet, + Method, + StatementBase, + StudyGroup, + StudyResult, + StudyResultBase, +) +from ga4gh.vrs.models import MolecularVariation +from pydantic import ConfigDict, Field + + +class AveFunctionalClassification(str, Enum): + """The functional classification of the variant effect in the assay.""" + + NORMAL = "normal" + INDETERMINATE = "indeterminate" + ABNORMAL = "abnormal" + + +class AveClinicalClassification(str, Enum): + """The clinical strength of evidence of the variant effect in the assay.""" + + PS3_STRONG = "PS3_Strong" + PS3_MODERATE = "PS3_Moderate" + PS3_SUPPORTING = "PS3_Supporting" + BS3_STRONG = "BS3_Strong" + BS3_MODERATE = "BS3_Moderate" + BS3_SUPPORTING = "BS3_Supporting" + + +class AssayVariantEffectFunctionalClassificationStatement(StatementBase): + """A statement that assigns a functional classification to a variant effect from a functional assay.""" + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["AssayVariantEffectFunctionalClassificationStatement"] = Field( + "AssayVariantEffectFunctionalClassificationStatement", + description="MUST be 'AssayVariantEffectFunctionalClassificationStatement'.", + ) + subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field( + ..., + description="A protein or genomic contextual or canonical molecular variant.", + ) + predicate: Literal["hasAssayVariantEffectFor"] = Field( + "hasAssayVariantEffectFor", + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectAssay: IRI | Coding = Field( + ..., + description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)", + ) + classification: AveFunctionalClassification = Field( + ..., + description="The functional classification of the variant effect in the assay.", + ) + specifiedBy: Method | IRI | None = Field( + None, + description="The method that specifies the functional classification of the variant effect in the assay.", + ) + + +class AssayVariantEffectClinicalClassificationStatement(StatementBase): + """A statement that assigns a clinical strength of evidence to a variant effect from a functional assay.""" + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["AssayVariantEffectClinicalClassificationStatement"] = Field( + "AssayVariantEffectClinicalClassificationStatement", + description="MUST be 'AssayVariantEffectClinicalClassificationStatement'.", + ) + subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field( + ..., + description="A protein or genomic contextual or canonical molecular variant.", + ) + predicate: Literal["hasAssayVariantEffectFor"] = Field( + "hasAssayVariantEffectFor", + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectAssay: IRI | Coding = Field( + ..., + description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)", + ) + classification: AveClinicalClassification = Field( + ..., + description="The clinical strength of evidence of the variant effect in the assay.", + ) + specifiedBy: Method | IRI | None = Field( + None, + description="The method that specifies the clinical strength of evidence of the variant effect in the assay.", + ) + + +class AssayVariantEffectMeasurementStudyResult(StudyResultBase): + """A StudyResult that reports a variant effect score from a functional assay.""" + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["AssayVariantEffectMeasurementStudyResult"] = Field( + "AssayVariantEffectMeasurementStudyResult", + description="MUST be 'AssayVariantEffectMeasurementStudyResult'.", + ) + componentResult: list[StudyResult] | None = Field( + None, + description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .", + ) + studyGroup: StudyGroup | None = Field( + None, + description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.", + ) + focusVariant: MolecularVariation | IRI | None = Field( + None, + description="The human mapped representation of the variant that is the subject of the Statement.", + ) + score: float | None = Field( + None, description="The score of the variant effect in the assay." + ) + specifiedBy: Method | IRI | None = Field( + None, + description="The assay that was used to measure the variant effect with all the various properties", + ) + sourceDataSet: list[DataSet] | None = Field( + None, description="The full data set that this measurement is a part of" + ) diff --git a/src/ga4gh/va_spec/profiles/caf_study_result.py b/src/ga4gh/va_spec/profiles/caf_study_result.py new file mode 100644 index 0000000..ecd348d --- /dev/null +++ b/src/ga4gh/va_spec/profiles/caf_study_result.py @@ -0,0 +1,49 @@ +"""VA Spec Cohort Allele Frequency (population frequency) Study Result Standard Profile""" + +from __future__ import annotations + +from typing import Literal + +from ga4gh.core.entity_models import ( + DataSet, + StudyResult, + StudyResultBase, +) +from ga4gh.vrs.models import Allele +from pydantic import ConfigDict, Field + + +class CohortAlleleFrequencyStudyResult(StudyResultBase): + """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["CohortAlleleFrequencyStudyResult"] = Field( + "CohortAlleleFrequencyStudyResult", + description="MUST be 'CohortAlleleFrequencyStudyResult'.", + ) + sourceDataSet: list[DataSet] | None = Field( + None, + description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.", + ) + focusAllele: Allele | str = Field( + ..., + description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.", + ) + focusAlleleCount: int = Field( + ..., description="The number of occurrences of the focusAllele in the cohort." + ) + locusAlleleCount: int = Field( + ..., + description="The number of occurrences of all alleles at the locus in the cohort (sometimes referred to as 'allele number')", + ) + focusAlleleFrequency: float = Field( + ..., description="The frequency of the focusAllele in the cohort." + ) + cohort: list[StudyResult] = Field( + ..., description="The cohort from which the frequency was derived." + ) + subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field( + None, + description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. This creates a recursive relationship and subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.", + ) diff --git a/src/ga4gh/va_spec/profiles/var_path_stmt.py b/src/ga4gh/va_spec/profiles/var_path_stmt.py new file mode 100644 index 0000000..6bf536d --- /dev/null +++ b/src/ga4gh/va_spec/profiles/var_path_stmt.py @@ -0,0 +1,54 @@ +"""VA Spec Variant Pathogenicity Statement Standard Profile""" + +from enum import Enum +from typing import Literal + +from ga4gh.cat_vrs.core_models import CategoricalVariant +from ga4gh.core.domain_models import Condition, Gene +from ga4gh.core.entity_models import IRI, Coding, StatementBase +from ga4gh.vrs.models import Variation +from pydantic import ConfigDict, Field + + +class PenetranceQualifier(str, Enum): + """Reports the penetrance of the pathogenic effect - i.e. the extent to which the + variant impact is expressed by individuals carrying it as a measure of the + proportion of carriers exhibiting the condition. + """ + + HIGH = "high" + LOW = "low" + RISK_ALLELE = "risk allele" + + +class VariantPathogenicityStatement(StatementBase): + """A Statement describing the role of a variant in causing an inherited condition.""" + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["VariantPathogenicityStatement"] = Field( + "VariantPathogenicityStatement", + description="MUST be 'VariantPathogenicityStatement'.", + ) + subjectVariant: Variation | CategoricalVariant | IRI = Field( + ..., description="A variant that is the subject of the Statement." + ) + predicate: Literal["isCausalFor"] = Field( + "isCausalFor", + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectCondition: Condition | IRI = Field( + ..., description="The Condition for which the variant impact is stated." + ) + penetranceQualifier: PenetranceQualifier | None = Field( + None, + description="Reports the penetrance of the pathogenic effect - i.e. the extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition.", + ) + modeOfInheritanceQualifier: list[Coding] | None = Field( + None, + description="Reports a pattern of inheritance expected for the pathogenic effect of the variant. Use HPO terms within the hierarchy of 'HP:0000005' (mode of inheritance) to specify.", + ) + geneContextQualifier: Gene | IRI | None = Field( + None, + description="Reports the gene through which the pathogenic effect asserted for the variant is mediated (i.e. it is the variant's impact on this gene that is responsible for causing the condition).", + ) diff --git a/src/ga4gh/va_spec/profiles/var_study_stmt.py b/src/ga4gh/va_spec/profiles/var_study_stmt.py new file mode 100644 index 0000000..bb7c7f9 --- /dev/null +++ b/src/ga4gh/va_spec/profiles/var_study_stmt.py @@ -0,0 +1,208 @@ +"""VA Spec Variant Study Statement Standard Profiles""" + +from enum import Enum +from typing import Literal + +from ga4gh.cat_vrs.core_models import CategoricalVariant +from ga4gh.core.domain_models import Condition, Gene, TherapeuticProcedure +from ga4gh.core.entity_models import IRI, StatementBase +from ga4gh.vrs.models import Variation +from pydantic import ConfigDict, Field + + +class AlleleOriginQualifier(str, Enum): + """Reports whether the statement should be interpreted in the context of an + inherited (germline) variant, an acquired (somatic) mutation, or both (combined). + """ + + GERMLINE = "germline" + SOMATIC = "somatic" + COMBINED = "combined" + + +class DiagnosticPredicate(str, Enum): + """Define constraints for diagnostic predicate""" + + INCLUSIVE = "isDiagnosticInclusionCriterionFor" + EXCLUSIVE = "isDiagnosticExclusionCriterionFor" + + +class OncogenicPredicate(str, Enum): + """Define constraints for oncogenic predicate""" + + ONCOGENIC = "isOncogenicFor" + PROTECTIVE = "isProtectiveFor" + PREDISPOSING = "isPredisposingFor" + + +class PrognosticPredicate(str, Enum): + """Define constraints for prognostic predicate""" + + BETTER_OUTCOME = "associatedWithBetterOutcomeFor" + WORSE_OUTCOME = "associatedWithWorseOutcomeFor" + + +class TherapeuticResponsePredicate(str, Enum): + """Define constraints for therapeutic response predicate""" + + SENSITIVITY = "predictsSensitivityTo" + RESISTANCE = "predictsResistanceTo" + + +class AllelePrevalenceQualifier(str, Enum): + """Reports whether the statement should be interpreted in the context of the variant + being rare or common. + """ + + RARE = "rare" + COMMON = "common" + + +class VariantDiagnosticStudyStatement(StatementBase): + """A Statement reporting a conclusion from a single study about whether a variant is + associated with a disease (a diagnostic inclusion criterion), or absence of a + disease (diagnostic exclusion criterion) - based on interpretation of the study's + results. + """ + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["VariantDiagnosticStudyStatement"] = Field( + "VariantDiagnosticStudyStatement", + description="MUST be 'VariantDiagnosticStudyStatement'.", + ) + subjectVariant: Variation | CategoricalVariant | IRI = Field( + ..., description="A variant that is the subject of the Statement." + ) + predicate: DiagnosticPredicate = Field( + ..., + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectCondition: Condition | IRI = Field( + ..., description="The disease that is evaluated for diagnosis." + ) + alleleOriginQualifier: AlleleOriginQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", + ) + allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", + ) + geneContextQualifier: Gene | IRI | None = Field( + None, + description="Reports a gene impacted by the variant, which may contribute to the diagnostic association in the Statement.", + ) + + +class VariantOncogenicityStudyStatement(StatementBase): + """A Statement reporting a conclusion from a single study that supports or refutes a + variant's effect on oncogenesis for a specific tumor type - based on interpretation + of the study's results. + """ + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["VariantOncogenicityStudyStatement"] = Field( + "VariantOncogenicityStudyStatement", + description="MUST be 'VariantOncogenicityStudyStatement'.", + ) + subjectVariant: Variation | CategoricalVariant | IRI = Field( + ..., description="A variant that is the subject of the Statement." + ) + predicate: OncogenicPredicate = Field( + ..., + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectTumorType: Condition | IRI = Field( + ..., description="The tumor type for which the variant impact is evaluated." + ) + alleleOriginQualifier: AlleleOriginQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", + ) + allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", + ) + geneContextQualifier: Gene | IRI | None = Field( + None, + description="Reports a gene impacted by the variant, which may contribute to the oncogenic role in the Statement.", + ) + + +class VariantPrognosticStudyStatement(StatementBase): + """A Statement reporting a conclusion from a single study about whether a variant is + associated with an improved or worse outcome for a disease - based on interpretation + of the study's results. + """ + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["VariantPrognosticStudyStatement"] = Field( + "VariantPrognosticStudyStatement", + description="MUST be 'VariantPrognosticStudyStatement'.", + ) + subjectVariant: Variation | CategoricalVariant | IRI = Field( + ..., description="A variant that is the subject of the Statement." + ) + predicate: PrognosticPredicate = Field( + ..., + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectCondition: Condition | IRI = Field( + ..., description="The disease that is evaluated for outcome." + ) + alleleOriginQualifier: AlleleOriginQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", + ) + allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", + ) + geneContextQualifier: Gene | IRI | None = Field( + None, + description="Reports a gene impacted by the variant, which may contribute to the prognostic association in the Statement.", + ) + + +class VariantTherapeuticResponseStudyStatement(StatementBase): + """A Statement reporting a conclusion from a single study about the role of a + variant in modulating the response of a neoplasm to drug administration or other + therapeutic procedures - based on interpretation of the study's results. + """ + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["VariantTherapeuticResponseStudyStatement"] = Field( + "VariantTherapeuticResponseStudyStatement", + description="MUST be 'VariantTherapeuticResponseStudyStatement'.", + ) + subjectVariant: Variation | CategoricalVariant | IRI = Field( + ..., description="A variant that is the subject of the Statement." + ) + predicate: TherapeuticResponsePredicate = Field( + ..., + description="The relationship declared to hold between the subject and the object of the Statement.", + ) + objectTherapeutic: TherapeuticProcedure | IRI = Field( + ..., + description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.", + ) + conditionQualifier: Condition | IRI = Field( + ..., + description="Reports the disease context in which the variant's association with therapeutic sensitivity or resistance is evaluated. Note that this is a required qualifier in therapeutic response statements.", + ) + alleleOriginQualifier: AlleleOriginQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", + ) + allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( + None, + description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", + ) + geneContextQualifier: Gene | IRI | None = Field( + None, + description="Reports a gene impacted by the variant, which may contribute to the therapeutic sensitivity or resistance reported in the Statement. ", + ) diff --git a/submodules/va_spec b/submodules/va_spec new file mode 160000 index 0000000..3261ad7 --- /dev/null +++ b/submodules/va_spec @@ -0,0 +1 @@ +Subproject commit 3261ad79b0c6d03786ae8f14287f1926dc5b45bd diff --git a/tests/validation/test_va_spec_schema.py b/tests/validation/test_va_spec_schema.py new file mode 100644 index 0000000..1d99d76 --- /dev/null +++ b/tests/validation/test_va_spec_schema.py @@ -0,0 +1,79 @@ +"""Test that VA-Spec Python model structures match VA-Spec Schema""" + +import json +from pathlib import Path +from typing import Literal, get_args, get_origin + +import ga4gh.va_spec.profiles as va_spec_profiles + +ROOT_DIR = Path(__file__).parents[2] +VA_SPEC_SCHEMA_DIR = ( + ROOT_DIR / "submodules" / "va_spec" / "schema" / "profiles" / "json" +) +VA_SPEC_SCHEMA = {} + +VA_SPEC_BASE_CLASSES = set() +VA_SPEC_CONCRETE_CLASSES = set() +VA_SPEC_PRIMITIVES = set() + + +# Get profile classes +for f in VA_SPEC_SCHEMA_DIR.glob("*"): + with f.open() as rf: + cls_def = json.load(rf) + + va_spec_class = cls_def["title"] + VA_SPEC_SCHEMA[va_spec_class] = cls_def + + if "properties" in cls_def: + VA_SPEC_CONCRETE_CLASSES.add(va_spec_class) + elif cls_def.get("type") in {"array", "integer", "string"}: + VA_SPEC_PRIMITIVES.add(va_spec_class) + else: + VA_SPEC_BASE_CLASSES.add(va_spec_class) + + +def test_schema_models_in_pydantic(): + """Ensure that each schema model has corresponding Pydantic model""" + for va_spec_class in ( + VA_SPEC_BASE_CLASSES | VA_SPEC_CONCRETE_CLASSES | VA_SPEC_PRIMITIVES + ): + assert getattr(va_spec_profiles, va_spec_class, False), va_spec_class + + +def test_schema_class_fields(): + """Check that each schema model properties exist and are required in corresponding + Pydantic model, and validate required properties + """ + for va_spec_class in VA_SPEC_CONCRETE_CLASSES: + schema_properties = VA_SPEC_SCHEMA[va_spec_class]["properties"] + pydantic_model = getattr(va_spec_profiles, va_spec_class) + assert set(pydantic_model.model_fields) == set(schema_properties), va_spec_class + + required_schema_fields = set(VA_SPEC_SCHEMA[va_spec_class]["required"]) + + for prop, property_def in schema_properties.items(): + pydantic_model_field_info = pydantic_model.model_fields[prop] + pydantic_field_required = pydantic_model_field_info.is_required() + + if prop in required_schema_fields: + if prop != "type": + if get_origin(pydantic_model_field_info.annotation) is Literal: + assert ( + get_args(pydantic_model_field_info.annotation)[0] + == pydantic_model_field_info.default + ) + else: + assert pydantic_field_required, f"{pydantic_model}.{prop}" + else: + assert not pydantic_field_required, f"{pydantic_model}.{prop}" + + if property_def.get("description") is not None: + field_descr = pydantic_model_field_info.description or "" + assert property_def["description"].replace( + "'", '"' + ) == field_descr.replace("'", '"'), f"{pydantic_model}.{prop}" + else: + assert ( + pydantic_model_field_info.description is None + ), f"{pydantic_model}.{prop}"