diff --git a/relecov_tools/assets/schema_utils/custom_validators.py b/relecov_tools/assets/schema_utils/custom_validators.py new file mode 100644 index 00000000..f674e298 --- /dev/null +++ b/relecov_tools/assets/schema_utils/custom_validators.py @@ -0,0 +1,27 @@ +import os +import json +from jsonschema import ValidationError + + +# TODO: ADD AN USAGE DOC HERE. + + +# Disable default enum validation for amr_genes +def ignore_enum(validator, enums, instance, schema): + pass + + +def validate_amr_genes(validator, value, instance, schema): + # Load Config from File + amr_config = os.path.join(os.path.dirname(__file__), "conf", "amr_genes.config") + with open(amr_config, "r") as file: + amr_json = json.load(file) + + amr_genes = instance.get("amr_genes", []) + for gene in amr_genes: + if gene not in amr_json.keys(): + yield ValidationError(f"Gene '{gene}' is not annotated in any group.") + + +# Map of custom validators +available = {"amr_genes_validator": validate_amr_genes} diff --git a/relecov_tools/conf/amr_genes.json b/relecov_tools/conf/amr_genes.json new file mode 100644 index 00000000..a2009c28 --- /dev/null +++ b/relecov_tools/conf/amr_genes.json @@ -0,0 +1,12 @@ +{ + "geneA": "group1", + "geneB": "group2", + "geneC": "group3", + "geneD": "group1", + "geneE": "group2", + "geneF": "group3", + "geneG": "group1", + "geneH": "group2", + "geneI": "group3", + "geneJ": "group1" +} \ No newline at end of file diff --git a/relecov_tools/conf/configuration.json b/relecov_tools/conf/configuration.json index 18ccb7b9..13ec3b7f 100755 --- a/relecov_tools/conf/configuration.json +++ b/relecov_tools/conf/configuration.json @@ -400,5 +400,8 @@ "fastq_r2_md5" ] } - } + }, + "json_schema_validators": [ + "amr_genes_validator" + ] } diff --git a/relecov_tools/json_validation.py b/relecov_tools/json_validation.py index 94ccec00..946c751e 100755 --- a/relecov_tools/json_validation.py +++ b/relecov_tools/json_validation.py @@ -9,6 +9,8 @@ import relecov_tools.utils from relecov_tools.config_json import ConfigJson +import relecov_tools.assets.schema_utils.custom_validators + log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -64,6 +66,42 @@ def validate_schema(self): stderr.print("[red] Json schema does not fulfill Draft 202012 Validation") sys.exit(1) + def register_custom_validators(self): + """Register custom JSON schema validators based on configuration.""" + + # Get custom validators utils + stderr.print("Loading configuration for custom JSON schema validators.") + conf = ConfigJson() + conf_path = os.path.join( + os.path.dirname(__file__), "conf", "configuration.json" + ) + + custom_validator_list = conf.get_configuration("json_schema_valiators") + if custom_validator_list is None: + stderr.print(f"No 'json_schema_validators' key found in {conf_path}") + return + + if not len(custom_validator_list) > 0: + stderr.print( + f"No validators defined yet under 'json_schema_validators' key in {conf_path}" + ) + return + + # Append custom validators to Draft202012Validator + stderr.print("Registering custom validators.") + available_validators = ( + relecov_tools.assets.schema_utils.custom_validators.available + ) + for validator_name in custom_validator_list: + if validator_name in available_validators: + Draft202012Validator.VALIDATORS[validator_name] = available_validators[ + validator_name + ] + stderr.print(f"Custom validator '{validator_name}' successfully added.") + else: + stderr.print(f"No validator found for '{validator_name}'. Exiting.") + sys.exit(1) + def validate_instances(self): """Validate data instances against a validated json schema""" @@ -189,5 +227,6 @@ def validate(self): """Write invalid samples from metadata to excel""" self.validate_schema() + self.register_custom_validators() invalid_json = self.validate_instances() self.create_invalid_metadata(invalid_json, self.metadata, self.out_folder) diff --git a/relecov_tools/schema/relecov_schema.json b/relecov_tools/schema/relecov_schema.json index d631537c..7fe2f732 100755 --- a/relecov_tools/schema/relecov_schema.json +++ b/relecov_tools/schema/relecov_schema.json @@ -18,7 +18,8 @@ "sequencing_instrument_model", "sequencing_instrument_platform", "enrichment_panel", - "enrichment_panel_version" + "enrichment_panel_version", + "amr_genes" ], "type": "object", "properties": { @@ -2899,6 +2900,18 @@ "description": "", "clasification": "Submission ENA", "label": "Run Alias" + }, + "tmp-amr_genes": { "$ref": "#/$defs/gene_name"} + }, + "amr_genes_validation": true, + "$defs": { + "gene_name" : { + "type": "array", + "items": { + "type": "string", + "enum": ["geneA", "geneB", "geneC", "geneD", "geneE", "geneF", "geneG", "geneH", "geneI", "geneJ"], + "uniqueItems": true + } } } }