Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance json schema validation with custom validators #272

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions relecov_tools/assets/schema_utils/custom_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
import json
from jsonschema import ValidationError


# TODO: ADD AN USAGE DOC HERE.


# Disable default enum validation for amr_genes
def ignore_enum(validator, enums, instance, schema):
pass


def validate_amr_genes(validator, value, instance, schema):
# Load Config from File
amr_config = os.path.join(os.path.dirname(__file__), "conf", "amr_genes.config")
with open(amr_config, "r") as file:
amr_json = json.load(file)

amr_genes = instance.get("amr_genes", [])
for gene in amr_genes:
if gene not in amr_json.keys():
yield ValidationError(f"Gene '{gene}' is not annotated in any group.")


# Map of custom validators
available = {"amr_genes_validator": validate_amr_genes}
12 changes: 12 additions & 0 deletions relecov_tools/conf/amr_genes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"geneA": "group1",
"geneB": "group2",
"geneC": "group3",
"geneD": "group1",
"geneE": "group2",
"geneF": "group3",
"geneG": "group1",
"geneH": "group2",
"geneI": "group3",
"geneJ": "group1"
}
5 changes: 4 additions & 1 deletion relecov_tools/conf/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -400,5 +400,8 @@
"fastq_r2_md5"
]
}
}
},
"json_schema_validators": [
"amr_genes_validator"
]
}
39 changes: 39 additions & 0 deletions relecov_tools/json_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import relecov_tools.utils
from relecov_tools.config_json import ConfigJson
import relecov_tools.assets.schema_utils.custom_validators


log = logging.getLogger(__name__)
stderr = rich.console.Console(
Expand Down Expand Up @@ -64,6 +66,42 @@ def validate_schema(self):
stderr.print("[red] Json schema does not fulfill Draft 202012 Validation")
sys.exit(1)

def register_custom_validators(self):
"""Register custom JSON schema validators based on configuration."""

# Get custom validators utils
stderr.print("Loading configuration for custom JSON schema validators.")
conf = ConfigJson()
conf_path = os.path.join(
os.path.dirname(__file__), "conf", "configuration.json"
)

custom_validator_list = conf.get_configuration("json_schema_valiators")
if custom_validator_list is None:
stderr.print(f"No 'json_schema_validators' key found in {conf_path}")
return

if not len(custom_validator_list) > 0:
stderr.print(
f"No validators defined yet under 'json_schema_validators' key in {conf_path}"
)
return

# Append custom validators to Draft202012Validator
stderr.print("Registering custom validators.")
available_validators = (
relecov_tools.assets.schema_utils.custom_validators.available
)
for validator_name in custom_validator_list:
if validator_name in available_validators:
Draft202012Validator.VALIDATORS[validator_name] = available_validators[
validator_name
]
stderr.print(f"Custom validator '{validator_name}' successfully added.")
else:
stderr.print(f"No validator found for '{validator_name}'. Exiting.")
sys.exit(1)

def validate_instances(self):
"""Validate data instances against a validated json schema"""

Expand Down Expand Up @@ -189,5 +227,6 @@ def validate(self):
"""Write invalid samples from metadata to excel"""

self.validate_schema()
self.register_custom_validators()
invalid_json = self.validate_instances()
self.create_invalid_metadata(invalid_json, self.metadata, self.out_folder)
15 changes: 14 additions & 1 deletion relecov_tools/schema/relecov_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"sequencing_instrument_model",
"sequencing_instrument_platform",
"enrichment_panel",
"enrichment_panel_version"
"enrichment_panel_version",
"amr_genes"
],
"type": "object",
"properties": {
Expand Down Expand Up @@ -2899,6 +2900,18 @@
"description": "",
"clasification": "Submission ENA",
"label": "Run Alias"
},
"tmp-amr_genes": { "$ref": "#/$defs/gene_name"}
},
"amr_genes_validation": true,
"$defs": {
"gene_name" : {
"type": "array",
"items": {
"type": "string",
"enum": ["geneA", "geneB", "geneC", "geneD", "geneE", "geneF", "geneG", "geneH", "geneI", "geneJ"],
"uniqueItems": true
}
}
}
}
Loading