diff --git a/src/tests/data_tests/test_1_annotation.py b/src/tests/data_tests/test_1_annotation.py index d9f3ae9..5044adb 100644 --- a/src/tests/data_tests/test_1_annotation.py +++ b/src/tests/data_tests/test_1_annotation.py @@ -1,3 +1,6 @@ +import pytest +import requests + from biothings.tests.web import BiothingsDataTest @@ -177,6 +180,76 @@ def test_112(self): def test_113(self): self.request("gene/", expect=400) + @pytest.mark.xfail( + reason="CURIE ID SUPPORT NOT CURRENTLY ENABLED ON MYGENE.INFO HOST", + run=True, + strict=True + ) + def test_114(self): + """ + Tests the annotation endpoint support for the biolink CURIE ID. + + If support is enabled then we should retrieve the exact same document + for all the provided queries + + A mirror copy of the tests we have in the biothings_client + package (gene.py) + """ + curie_id_testing_collection = [ + ("1017", "entrezgene:1017", "NCBIgene:1017"), + (1017, "entrezgene:1017", "ncbigene:1017"), + ("1017", "entrezgene:1017", "NCBIGENE:1017"), + ("1018", "ensembl.gene:ENSG00000250506", "ENSEMBL:ENSG00000250506"), + (1018, "ensembl.gene:ENSG00000250506", "ensembl:ENSG00000250506"), + ("5995", "uniprot.Swiss-Prot:P47804", "UniProtKB:P47804"), + (5995, "uniprot.Swiss-Prot:P47804", "UNIPROTKB:P47804"), + ("5995", "uniprot.Swiss-Prot:P47804", "uniprotkb:P47804"), + ] + + results_aggregation = [] + endpoint = "gene" + for id_query, biothings_query, biolink_query in curie_id_testing_collection: + id_query_result = self.request(f"{endpoint}/{id_query}", expect=200) + assert isinstance(id_query_result, requests.models.Response) + assert id_query_result.url == self.get_url(path=f"{endpoint}/{id_query}") + + biothings_term_query_result = self.request( + f"{endpoint}/{biothings_query}", expect=200 + ) + assert isinstance(biothings_term_query_result, requests.models.Response) + assert biothings_term_query_result.url == self.get_url( + path=f"{endpoint}/{biothings_query}" + ) + + biolink_term_query_result = self.request( + f"{endpoint}/{biolink_query}", expect=200 + ) + assert isinstance(biolink_term_query_result, requests.models.Response) + assert biolink_term_query_result.url == self.get_url( + path=f"{endpoint}/{biolink_query}" + ) + + results_aggregation.append( + ( + id_query_result.json() == biothings_term_query_result.json(), + id_query_result.json() == biolink_term_query_result.json(), + biothings_term_query_result.json() + == biolink_term_query_result.json(), + ) + ) + + results_validation = [] + failure_messages = [] + for result, test_query in zip(results_aggregation, curie_id_testing_collection): + cumulative_result = all(result) + if not cumulative_result: + failure_messages.append( + f"Query Failure: {test_query} | Results: {result}" + ) + results_validation.append(cumulative_result) + + assert all(results_validation), "\n".join(failure_messages) + class TestAnnotationPOST(BiothingsDataTest): host = "mygene.info" @@ -264,3 +337,83 @@ def test_155(self): # get retired gene res = self.request("gene", method="POST", data={"ids": "791256"}).json() assert res[0]["_id"] == "50846" # this is the corresponding _id field + + @pytest.mark.xfail( + reason="CURIE ID SUPPORT NOT CURRENTLY ENABLED ON MYGENE.INFO HOST", + run=True, + strict=True + ) + def test_156(self): + """ + Tests the annotations endpoint support for the biolink CURIE ID. + + Batch query testing against the POST endpoint to verify that the CURIE ID can work with + multiple + + If support is enabled then we should retrieve the exact same document for all the provided + queries + + A mirror copy of the tests we have in the biothings_client + package (gene.py) + """ + curie_id_testing_collection = [ + ("1017", "entrezgene:1017", "NCBIgene:1017"), + (1017, "entrezgene:1017", "ncbigene:1017"), + ("1017", "entrezgene:1017", "NCBIGENE:1017"), + ("1018", "ensembl.gene:ENSG00000250506", "ENSEMBL:ENSG00000250506"), + (1018, "ensembl.gene:ENSG00000250506", "ensembl:ENSG00000250506"), + ("5995", "uniprot.Swiss-Prot:P47804", "UniProtKB:P47804"), + (5995, "uniprot.Swiss-Prot:P47804", "UNIPROTKB:P47804"), + ("5995", "uniprot.Swiss-Prot:P47804", "uniprotkb:P47804"), + ] + + results_aggregation = [] + endpoint = "gene" + for id_query, biothings_query, biolink_query in curie_id_testing_collection: + base_result = self.request(f"{endpoint}/{id_query}", expect=200) + + query_collection = (id_query, biothings_query, biolink_query) + delimiter = "," + data_mapping = { + "ids": delimiter.join([f'"{query}"' for query in query_collection]) + } + header_mapping = { + "user-agent": "biothings_client.py/0.3.1 (python:3.11.2 requests:2.31.0)" + } + query_results = self.request( + endpoint, method="POST", data=data_mapping, headers=header_mapping + ).json() + assert len(query_results) == len(query_collection) + + batch_id_query = query_results[0] + batch_biothings_query = query_results[1] + batch_biolink_query = query_results[2] + + batch_id_query_return_value = batch_id_query.pop("query") + assert batch_id_query_return_value == str(id_query) + + batch_biothings_query_return_value = batch_biothings_query.pop("query") + assert batch_biothings_query_return_value == str(biothings_query) + + batch_biolink_query_return_value = batch_biolink_query.pop("query") + assert batch_biolink_query_return_value == str(biolink_query) + + batch_result = ( + base_result.json() == batch_id_query, + base_result.json() == batch_biothings_query, + base_result.json() == batch_biolink_query, + ) + results_aggregation.append(batch_result) + + results_validation = [] + failure_messages = [] + for result, test_query in zip(results_aggregation, curie_id_testing_collection): + cumulative_result = all(result) + if not cumulative_result: + failure_messages.append( + f"Query Failure: {test_query} | Results: {result}" + ) + results_validation.append(cumulative_result) + + assert all(results_validation), "\n".join(failure_messages) + assert all(results_validation), "\n".join(failure_messages)