Skip to content

Commit

Permalink
first draft of variantutils
Browse files Browse the repository at this point in the history
  • Loading branch information
TomDuraisingh committed Aug 4, 2023
1 parent a91957f commit c8b9e3e
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 0 deletions.
99 changes: 99 additions & 0 deletions dcicutils/variant_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import json
from dcicutils.ff_utils import get_metadata, get_health_page, search_metadata
from dcicutils.creds_utils import CGAPKeyManager



class VariantUtils:

SEARCH_VARIANTS_BY_GENE = '/search/?type=VariantSample&limit=1&variant.genes.genes_most_severe_gene.display_title='
SEARCH_RARE_VARIANTS_BY_GENE = '/search/?samplegeno.samplegeno_role=proband&type=VariantSample&variant.csq_gnomadg_af_popmax.from=0&variant.csq_gnomadg_af_popmax.to=0.001&variant.genes.genes_most_severe_gene.display_title='


def __init__(self, *, env_name) -> None:
self._key_manager = CGAPKeyManager()
self.creds = self._key_manager.get_keydict_for_env(env=env_name)
# Uncomment this if needed
# self.health = get_health_page(key=self.creds)
self.base_url = self.creds['server']

def get_creds(self):
return self.creds

# Uncomment this if needed
# def get_health(self):
# return self.health

def get_rare_variants_by_gene(self, *, gene, sort, addon = ''):
"""Does a search for rare variants on a particular gene"""
return search_metadata(f'{self.base_url}/{self.SEARCH_RARE_VARIANTS_BY_GENE}{gene}&sort=-{sort}{addon}', key=self.creds)

def find_number_of_sample_ids(self, gene):
"""returns the number of samples that have a mutation on the specified gene"""
return len(set(variant.get('CALL_INFO') for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID')))

def get_total_result_count_from_search(self, gene):
"""returns total number of variants associated with specified gene"""
res = get_metadata(self.SEARCH_VARIANTS_BY_GENE + gene, key=self.creds)
return res['total']

@staticmethod
def sort_dict_in_descending_order(unsorted_dict):
"""sorts dictionary in descending value order"""
sorted_list = sorted(unsorted_dict.items(), key=lambda x:x[1], reverse=True)
return dict(sorted_list)

def create_dict_of_mutations(self, gene):
"""cretes dictionary of specified gene and 10+ occuring positions with their number of variants"""
mutation_dict = {}
unique_positions = set()
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'):
pos = variant['variant']['POS']
if pos not in unique_positions:
unique_positions.add(pos)
mutation_dict[pos] = 1
else:
mutation_dict[pos] += 1
return {gene: self.sort_dict_in_descending_order({k: v for k, v in mutation_dict.items() if v >= 10})}

@staticmethod
def return_json(file_name):
with open(file_name, 'r') as f:
file_content = json.loads(f)
return file_content


@staticmethod
def create_dict_from_json_file(file_name):
"""creates dictionary object from json file"""
with open(file_name) as f:
json_list = f.read()
return json.loads(json_list)


def create_list_of_msa_genes(self):
"""creates list of all genes relating to the brain or nervous system (by 'neur' and 'nerv')"""
genes = self.return_json('gene.json')
return [gene['gene_symbol'] for gene in genes
if 'nerv' in gene.get('gene_summary', '')
or 'neur' in gene.get('gene_summary', '')]


def find_number_of_sample_ids(self, gene):
"""returns the number of samples that have a mutation on the specified gene"""
return len(set(variant.get('CALL_INFO') for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID')))


def create_url(self, gene):
"""returns a url to the variants at the most commonly mutated position of a gene"""
d = self.create_dict_from_json_file('10+sorted_msa_genes_and_mutations.json')
pos = list(d[gene].keys())[0]
return self.SEARCH_RARE_VARIANTS_BY_GENE + gene + '&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP'


def create_list_of_als_park_genes(self):
"""cretes list of genes that mention Parkinson's or ALS in their summary"""
genes = self.return_json('gene.json')
return [gene['gene_symbol'] for gene in genes
if 'Parkinson' in gene.get('gene_summary', '')
or 'ALS' in gene.get('gene_summary', '')]
139 changes: 139 additions & 0 deletions test/test_variant_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import pytest
from unittest import mock
from contextlib import contextmanager
from dcicutils import variant_utils
from dcicutils.variant_utils import VariantUtils
from unittest.mock import patch, mock_open


def create_dummy_keydict():
return {'cgap-dummy': {
'key': 'dummy', 'secret': 'dummy',
'server': 'cgap-test.com'
}}


class TestVariantUtils:

class CGAPKeyManager:
def get_keydict_for_env(self, *, env):
return create_dummy_keydict()['cgap-dummy']


@contextmanager
def mock_key_manager(self):
with mock.patch.object(variant_utils, 'CGAPKeyManager', new=self.CGAPKeyManager):
yield


def test_variant_utils_basic(self):
""" Tests the instantiation of a VariantUtils object """
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap-dummy')


@pytest.mark.parametrize('total_value', [
100,
200,
300,
400
])
@patch('dcicutils.variant_utils.get_metadata')
def test_get_total_result_count_from_search(self, mock_get_metadata, total_value):
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap-dummy')
mock_gene = 'GENE'
mock_get_metadata.return_value = {'total': total_value}
result = vu.get_total_result_count_from_search(mock_gene)
expected_result = total_value
assert result == expected_result
mock_get_metadata.assert_called_once_with(f'/search/?type=VariantSample&limit=1&variant.genes.genes_most_severe_gene.display_title={mock_gene}', key=vu.creds)


@pytest.mark.parametrize('returned_variants, expected_length', [
([{'variant': {'POS': 100000}}], 8),
([{'variant': {'POS': 100000}}], 9),
([{'variant': {'POS': 100000}}], 10),
([{'variant': {'POS': 100000}}], 11),
])
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene')
def test_create_dict_of_mutations(self, mock_get_rare_variants_by_gene, returned_variants, expected_length):
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap-dummy')
mock_gene = 'GENE'
mock_get_rare_variants_by_gene.return_value = (returned_variants * expected_length)
result = vu.create_dict_of_mutations(mock_gene)
if expected_length >= 10:
expected_result = {mock_gene: {100000: expected_length}}
else:
expected_result = {mock_gene: {}}
assert result == expected_result
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID')


@patch('dcicutils.variant_utils.VariantUtils.return_json')
def test_create_list_of_msa_genes(self, mock_return_json):
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap-dummy')
mock_return_json.return_value = [
{'gene_symbol': 'GENE1', 'gene_summary': '...nerv...'},
{'gene_symbol': 'GENE2', 'gene_summary': '..........'},
{'gene_symbol': 'GENE3', 'gene_summary': '...neur...'}
]
result = vu.create_list_of_msa_genes()
expected_result = ['GENE1', 'GENE3']
assert result == expected_result
mock_return_json.assert_called_once_with('gene.json')


@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene')
def test_find_number_of_sample_ids(self, mock_get_rare_variants_by_gene):
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap_dummy')
mock_gene = 'GENE'
mock_get_rare_variants_by_gene.return_value = [
{'CALL_INFO': 'ABC123'},
{'CALL_INFO': 'ABC123'},
{'CALL_INFO': 'BCD234'},
{'CALL_INFO': 'CDE345'}
]
result = vu.find_number_of_sample_ids(mock_gene)
expected_result = 3
assert result == expected_result
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID')


@pytest.mark.parametrize('pos', [
'100000',
'200000',
'300000',
'400000'
])
@patch('dcicutils.variant_utils.VariantUtils.create_dict_from_json_file')
def test_create_url(self, mock_create_dict_from_json_file, pos):
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap_dummy')
mock_gene = 'GENE'
mock_create_dict_from_json_file.return_value = {
'GENE': {pos: 20, '123456': 10},
'OTHER_GENE': {pos: 10}
}
result = vu.create_url(gene=mock_gene)
expected_result = vu.SEARCH_RARE_VARIANTS_BY_GENE + mock_gene + '&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP'
assert result == expected_result
mock_create_dict_from_json_file.assert_called_once_with('10+sorted_msa_genes_and_mutations.json')


@patch('dcicutils.variant_utils.VariantUtils.return_json')
def test_create_list_of_als_park_genes(self, mock_return_json):
with self.mock_key_manager():
vu = VariantUtils(env_name='cgap-dummy')
mock_return_json.return_value = [
{'gene_symbol': 'GENE1', 'gene_summary': '...Parkinson...'},
{'gene_symbol': 'GENE2', 'gene_summary': '...............'},
{'gene_symbol': 'GENE3', 'gene_summary': '.....ALS.......'}
]
result = vu.create_list_of_als_park_genes()
expected_result = ['GENE1', 'GENE3']
assert result == expected_result
mock_return_json.assert_called_once_with('gene.json')

0 comments on commit c8b9e3e

Please sign in to comment.