-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a91957f
commit c8b9e3e
Showing
2 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import json | ||
from dcicutils.ff_utils import get_metadata, get_health_page, search_metadata | ||
from dcicutils.creds_utils import CGAPKeyManager | ||
|
||
|
||
|
||
class VariantUtils: | ||
|
||
SEARCH_VARIANTS_BY_GENE = '/search/?type=VariantSample&limit=1&variant.genes.genes_most_severe_gene.display_title=' | ||
SEARCH_RARE_VARIANTS_BY_GENE = '/search/?samplegeno.samplegeno_role=proband&type=VariantSample&variant.csq_gnomadg_af_popmax.from=0&variant.csq_gnomadg_af_popmax.to=0.001&variant.genes.genes_most_severe_gene.display_title=' | ||
|
||
|
||
def __init__(self, *, env_name) -> None: | ||
self._key_manager = CGAPKeyManager() | ||
self.creds = self._key_manager.get_keydict_for_env(env=env_name) | ||
# Uncomment this if needed | ||
# self.health = get_health_page(key=self.creds) | ||
self.base_url = self.creds['server'] | ||
|
||
def get_creds(self): | ||
return self.creds | ||
|
||
# Uncomment this if needed | ||
# def get_health(self): | ||
# return self.health | ||
|
||
def get_rare_variants_by_gene(self, *, gene, sort, addon = ''): | ||
"""Does a search for rare variants on a particular gene""" | ||
return search_metadata(f'{self.base_url}/{self.SEARCH_RARE_VARIANTS_BY_GENE}{gene}&sort=-{sort}{addon}', key=self.creds) | ||
|
||
def find_number_of_sample_ids(self, gene): | ||
"""returns the number of samples that have a mutation on the specified gene""" | ||
return len(set(variant.get('CALL_INFO') for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'))) | ||
|
||
def get_total_result_count_from_search(self, gene): | ||
"""returns total number of variants associated with specified gene""" | ||
res = get_metadata(self.SEARCH_VARIANTS_BY_GENE + gene, key=self.creds) | ||
return res['total'] | ||
|
||
@staticmethod | ||
def sort_dict_in_descending_order(unsorted_dict): | ||
"""sorts dictionary in descending value order""" | ||
sorted_list = sorted(unsorted_dict.items(), key=lambda x:x[1], reverse=True) | ||
return dict(sorted_list) | ||
|
||
def create_dict_of_mutations(self, gene): | ||
"""cretes dictionary of specified gene and 10+ occuring positions with their number of variants""" | ||
mutation_dict = {} | ||
unique_positions = set() | ||
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'): | ||
pos = variant['variant']['POS'] | ||
if pos not in unique_positions: | ||
unique_positions.add(pos) | ||
mutation_dict[pos] = 1 | ||
else: | ||
mutation_dict[pos] += 1 | ||
return {gene: self.sort_dict_in_descending_order({k: v for k, v in mutation_dict.items() if v >= 10})} | ||
|
||
@staticmethod | ||
def return_json(file_name): | ||
with open(file_name, 'r') as f: | ||
file_content = json.loads(f) | ||
return file_content | ||
|
||
|
||
@staticmethod | ||
def create_dict_from_json_file(file_name): | ||
"""creates dictionary object from json file""" | ||
with open(file_name) as f: | ||
json_list = f.read() | ||
return json.loads(json_list) | ||
|
||
|
||
def create_list_of_msa_genes(self): | ||
"""creates list of all genes relating to the brain or nervous system (by 'neur' and 'nerv')""" | ||
genes = self.return_json('gene.json') | ||
return [gene['gene_symbol'] for gene in genes | ||
if 'nerv' in gene.get('gene_summary', '') | ||
or 'neur' in gene.get('gene_summary', '')] | ||
|
||
|
||
def find_number_of_sample_ids(self, gene): | ||
"""returns the number of samples that have a mutation on the specified gene""" | ||
return len(set(variant.get('CALL_INFO') for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'))) | ||
|
||
|
||
def create_url(self, gene): | ||
"""returns a url to the variants at the most commonly mutated position of a gene""" | ||
d = self.create_dict_from_json_file('10+sorted_msa_genes_and_mutations.json') | ||
pos = list(d[gene].keys())[0] | ||
return self.SEARCH_RARE_VARIANTS_BY_GENE + gene + '&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP' | ||
|
||
|
||
def create_list_of_als_park_genes(self): | ||
"""cretes list of genes that mention Parkinson's or ALS in their summary""" | ||
genes = self.return_json('gene.json') | ||
return [gene['gene_symbol'] for gene in genes | ||
if 'Parkinson' in gene.get('gene_summary', '') | ||
or 'ALS' in gene.get('gene_summary', '')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import pytest | ||
from unittest import mock | ||
from contextlib import contextmanager | ||
from dcicutils import variant_utils | ||
from dcicutils.variant_utils import VariantUtils | ||
from unittest.mock import patch, mock_open | ||
|
||
|
||
def create_dummy_keydict(): | ||
return {'cgap-dummy': { | ||
'key': 'dummy', 'secret': 'dummy', | ||
'server': 'cgap-test.com' | ||
}} | ||
|
||
|
||
class TestVariantUtils: | ||
|
||
class CGAPKeyManager: | ||
def get_keydict_for_env(self, *, env): | ||
return create_dummy_keydict()['cgap-dummy'] | ||
|
||
|
||
@contextmanager | ||
def mock_key_manager(self): | ||
with mock.patch.object(variant_utils, 'CGAPKeyManager', new=self.CGAPKeyManager): | ||
yield | ||
|
||
|
||
def test_variant_utils_basic(self): | ||
""" Tests the instantiation of a VariantUtils object """ | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
|
||
|
||
@pytest.mark.parametrize('total_value', [ | ||
100, | ||
200, | ||
300, | ||
400 | ||
]) | ||
@patch('dcicutils.variant_utils.get_metadata') | ||
def test_get_total_result_count_from_search(self, mock_get_metadata, total_value): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_gene = 'GENE' | ||
mock_get_metadata.return_value = {'total': total_value} | ||
result = vu.get_total_result_count_from_search(mock_gene) | ||
expected_result = total_value | ||
assert result == expected_result | ||
mock_get_metadata.assert_called_once_with(f'/search/?type=VariantSample&limit=1&variant.genes.genes_most_severe_gene.display_title={mock_gene}', key=vu.creds) | ||
|
||
|
||
@pytest.mark.parametrize('returned_variants, expected_length', [ | ||
([{'variant': {'POS': 100000}}], 8), | ||
([{'variant': {'POS': 100000}}], 9), | ||
([{'variant': {'POS': 100000}}], 10), | ||
([{'variant': {'POS': 100000}}], 11), | ||
]) | ||
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene') | ||
def test_create_dict_of_mutations(self, mock_get_rare_variants_by_gene, returned_variants, expected_length): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_gene = 'GENE' | ||
mock_get_rare_variants_by_gene.return_value = (returned_variants * expected_length) | ||
result = vu.create_dict_of_mutations(mock_gene) | ||
if expected_length >= 10: | ||
expected_result = {mock_gene: {100000: expected_length}} | ||
else: | ||
expected_result = {mock_gene: {}} | ||
assert result == expected_result | ||
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID') | ||
|
||
|
||
@patch('dcicutils.variant_utils.VariantUtils.return_json') | ||
def test_create_list_of_msa_genes(self, mock_return_json): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_return_json.return_value = [ | ||
{'gene_symbol': 'GENE1', 'gene_summary': '...nerv...'}, | ||
{'gene_symbol': 'GENE2', 'gene_summary': '..........'}, | ||
{'gene_symbol': 'GENE3', 'gene_summary': '...neur...'} | ||
] | ||
result = vu.create_list_of_msa_genes() | ||
expected_result = ['GENE1', 'GENE3'] | ||
assert result == expected_result | ||
mock_return_json.assert_called_once_with('gene.json') | ||
|
||
|
||
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene') | ||
def test_find_number_of_sample_ids(self, mock_get_rare_variants_by_gene): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap_dummy') | ||
mock_gene = 'GENE' | ||
mock_get_rare_variants_by_gene.return_value = [ | ||
{'CALL_INFO': 'ABC123'}, | ||
{'CALL_INFO': 'ABC123'}, | ||
{'CALL_INFO': 'BCD234'}, | ||
{'CALL_INFO': 'CDE345'} | ||
] | ||
result = vu.find_number_of_sample_ids(mock_gene) | ||
expected_result = 3 | ||
assert result == expected_result | ||
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID') | ||
|
||
|
||
@pytest.mark.parametrize('pos', [ | ||
'100000', | ||
'200000', | ||
'300000', | ||
'400000' | ||
]) | ||
@patch('dcicutils.variant_utils.VariantUtils.create_dict_from_json_file') | ||
def test_create_url(self, mock_create_dict_from_json_file, pos): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap_dummy') | ||
mock_gene = 'GENE' | ||
mock_create_dict_from_json_file.return_value = { | ||
'GENE': {pos: 20, '123456': 10}, | ||
'OTHER_GENE': {pos: 10} | ||
} | ||
result = vu.create_url(gene=mock_gene) | ||
expected_result = vu.SEARCH_RARE_VARIANTS_BY_GENE + mock_gene + '&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP' | ||
assert result == expected_result | ||
mock_create_dict_from_json_file.assert_called_once_with('10+sorted_msa_genes_and_mutations.json') | ||
|
||
|
||
@patch('dcicutils.variant_utils.VariantUtils.return_json') | ||
def test_create_list_of_als_park_genes(self, mock_return_json): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_return_json.return_value = [ | ||
{'gene_symbol': 'GENE1', 'gene_summary': '...Parkinson...'}, | ||
{'gene_symbol': 'GENE2', 'gene_summary': '...............'}, | ||
{'gene_symbol': 'GENE3', 'gene_summary': '.....ALS.......'} | ||
] | ||
result = vu.create_list_of_als_park_genes() | ||
expected_result = ['GENE1', 'GENE3'] | ||
assert result == expected_result | ||
mock_return_json.assert_called_once_with('gene.json') |