-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #272 from 4dn-dcic/td_scratch
VariantUtils and TestVariantUtils
- Loading branch information
Showing
8 changed files
with
259 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,9 +47,9 @@ | |
}, | ||
"David Michaels": { | ||
"emails": [ | ||
"[email protected]", | ||
"[email protected]", | ||
"[email protected]", | ||
"[email protected]" | ||
"[email protected]" | ||
], | ||
"names": [ | ||
"David Michaels", | ||
|
@@ -58,8 +58,8 @@ | |
}, | ||
"Douglas Rioux": { | ||
"emails": [ | ||
"[email protected]", | ||
"[email protected]" | ||
"[email protected]", | ||
"[email protected]" | ||
], | ||
"names": [ | ||
"Douglas Rioux", | ||
|
@@ -85,8 +85,8 @@ | |
}, | ||
"Kent M Pitman": { | ||
"emails": [ | ||
"[email protected]", | ||
"[email protected]" | ||
"[email protected]", | ||
"[email protected]" | ||
], | ||
"names": [ | ||
"Kent M Pitman", | ||
|
@@ -129,6 +129,16 @@ | |
"SooLee" | ||
] | ||
}, | ||
"Tom Duraisingh": { | ||
"emails": [ | ||
"[email protected]", | ||
"contributors.TomDuraisingh.emails.138792649+TomDuraisingh@users.noreply.github.com" | ||
], | ||
"names": [ | ||
"TomDuraisingh", | ||
"Tom Duraisingh" | ||
] | ||
}, | ||
"Will Ronchetti": { | ||
"emails": [ | ||
"[email protected]" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import json | ||
from dcicutils.ff_utils import get_metadata, search_metadata | ||
from dcicutils.creds_utils import CGAPKeyManager | ||
|
||
|
||
class VariantUtils: | ||
|
||
SEARCH_VARIANTS_BY_GENE = (f'/search/?type=VariantSample&limit=1' | ||
f'&variant.genes.genes_most_severe_gene.display_title=') | ||
SEARCH_RARE_VARIANTS_BY_GENE = (f'/search/?samplegeno.samplegeno_role=proband&type=VariantSample' | ||
f'&variant.csq_gnomadg_af_popmax.from=0&variant.csq_gnomadg_af_popmax.to=0.001' | ||
f'&variant.genes.genes_most_severe_gene.display_title=') | ||
|
||
def __init__(self, *, env_name) -> None: | ||
self._key_manager = CGAPKeyManager() | ||
self.creds = self._key_manager.get_keydict_for_env(env=env_name) | ||
# Uncomment this if needed | ||
# self.health = get_health_page(key=self.creds) | ||
self.base_url = self.creds['server'] | ||
|
||
def get_creds(self): | ||
return self.creds | ||
|
||
def get_rare_variants_by_gene(self, *, gene, sort, addon=''): | ||
"""Searches for rare variants on a particular gene""" | ||
return search_metadata(f'{self.base_url}/{self.SEARCH_RARE_VARIANTS_BY_GENE}{gene}\ | ||
&sort=-{sort}{addon}', key=self.creds) | ||
|
||
def find_number_of_sample_ids(self, gene): | ||
"""Returns the number of samples that have a mutation on the specified gene""" | ||
return len(set(variant.get('CALL_INFO') | ||
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'))) | ||
|
||
def get_total_result_count_from_search(self, gene): | ||
"""Returns total number of variants associated with specified gene""" | ||
res = get_metadata(self.SEARCH_VARIANTS_BY_GENE + gene, key=self.creds) | ||
return res['total'] | ||
|
||
@staticmethod | ||
def sort_dict_in_descending_order(unsorted_dict): | ||
"""Sorts dictionary in descending value order""" | ||
sorted_list = sorted(unsorted_dict.items(), key=lambda x: x[1], reverse=True) | ||
return dict(sorted_list) | ||
|
||
def create_dict_of_mutations(self, gene): | ||
"""Creates dictionary of specified gene and mutations that occur 10+ times in database, in the form: | ||
{gene: {mutation1 pos: #variants, mutation2 pos: #variants, ...}""" | ||
mutation_dict = {} | ||
unique_positions = set() | ||
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'): | ||
pos = variant['variant']['POS'] | ||
if pos not in unique_positions: | ||
unique_positions.add(pos) | ||
mutation_dict[pos] = 1 | ||
else: | ||
mutation_dict[pos] += 1 | ||
return {gene: self.sort_dict_in_descending_order({k: v for k, v in mutation_dict.items() if v >= 10})} | ||
|
||
@staticmethod | ||
def return_json(file_name): | ||
with open(file_name, 'r') as f: | ||
file_content = json.loads(f) | ||
return file_content | ||
|
||
@staticmethod | ||
def create_dict_from_json_file(file_name): | ||
"""Creates dictionary object from specified json file""" | ||
with open(file_name) as f: | ||
json_list = f.read() | ||
return json.loads(json_list) | ||
|
||
def create_list_of_msa_genes(self): | ||
"""Creates list of genes relating to the brain or nervous system | ||
(determined by whether keywords 'neur' or 'nerv' in summary)""" | ||
genes = self.return_json('gene.json') | ||
return [gene['gene_symbol'] for gene in genes | ||
if 'nerv' in gene.get('gene_summary', '') | ||
or 'neur' in gene.get('gene_summary', '')] | ||
|
||
def create_url(self, gene): | ||
"""Returns a url to the variants at the most commonly mutated position of specified gene""" | ||
d = self.create_dict_from_json_file('10+sorted_msa_genes_and_mutations.json') | ||
pos = list(d[gene].keys())[0] | ||
return self.SEARCH_RARE_VARIANTS_BY_GENE + gene + f'&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP' | ||
|
||
def create_list_of_als_park_genes(self): | ||
"""Creates list of genes that relating to Parkinson's or ALS | ||
(determined by whether keywords 'Parkinson' or 'ALS' in summary)""" | ||
genes = self.return_json('gene.json') | ||
return [gene['gene_symbol'] for gene in genes | ||
if 'Parkinson' in gene.get('gene_summary', '') | ||
or 'ALS' in gene.get('gene_summary', '')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "dcicutils" | ||
version = "7.7.2" | ||
version = "7.8.0" | ||
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" | ||
authors = ["4DN-DCIC Team <[email protected]>"] | ||
license = "MIT" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import pytest | ||
from unittest import mock | ||
from contextlib import contextmanager | ||
from dcicutils import variant_utils | ||
from dcicutils.variant_utils import VariantUtils | ||
from unittest.mock import patch | ||
|
||
|
||
def create_dummy_keydict(): | ||
return {'cgap-dummy': { | ||
'key': 'dummy', 'secret': 'dummy', | ||
'server': 'cgap-test.com' | ||
}} | ||
|
||
|
||
class TestVariantUtils: | ||
|
||
class CGAPKeyManager: | ||
def get_keydict_for_env(self, *, env): | ||
return create_dummy_keydict()['cgap-dummy'] | ||
|
||
@contextmanager | ||
def mock_key_manager(self): | ||
with mock.patch.object(variant_utils, 'CGAPKeyManager', new=self.CGAPKeyManager): | ||
yield | ||
|
||
def test_variant_utils_basic(self): | ||
"""Tests the instantiation of a VariantUtils object """ | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
assert isinstance(vu, VariantUtils) | ||
|
||
@pytest.mark.parametrize('total_value', [ | ||
100, | ||
200, | ||
300, | ||
400 | ||
]) | ||
@patch('dcicutils.variant_utils.get_metadata') | ||
def test_get_total_result_count_from_search(self, mock_get_metadata, total_value): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_gene = 'GENE' | ||
mock_get_metadata.return_value = {'total': total_value} | ||
result = vu.get_total_result_count_from_search(mock_gene) | ||
expected_result = total_value | ||
assert result == expected_result | ||
mock_get_metadata.assert_called_once_with(f'/search/?type=VariantSample&limit=1' | ||
f'&variant.genes.genes_most_severe_gene.display_title=' | ||
f'{mock_gene}', key=vu.creds) | ||
|
||
@pytest.mark.parametrize('returned_variants, expected_length', [ | ||
([{'variant': {'POS': 100000}}], 8), | ||
([{'variant': {'POS': 100000}}], 9), | ||
([{'variant': {'POS': 100000}}], 10), | ||
([{'variant': {'POS': 100000}}], 11), | ||
]) | ||
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene') | ||
def test_create_dict_of_mutations(self, mock_get_rare_variants_by_gene, returned_variants, expected_length): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_gene = 'GENE' | ||
mock_get_rare_variants_by_gene.return_value = (returned_variants * expected_length) | ||
result = vu.create_dict_of_mutations(mock_gene) | ||
if expected_length >= 10: | ||
expected_result = {mock_gene: {100000: expected_length}} | ||
else: | ||
expected_result = {mock_gene: {}} | ||
assert result == expected_result | ||
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID') | ||
|
||
@patch('dcicutils.variant_utils.VariantUtils.return_json') | ||
def test_create_list_of_msa_genes(self, mock_return_json): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_return_json.return_value = [ | ||
{'gene_symbol': 'GENE1', 'gene_summary': '...nerv...'}, | ||
{'gene_symbol': 'GENE2', 'gene_summary': '..........'}, | ||
{'gene_symbol': 'GENE3', 'gene_summary': '...neur...'} | ||
] | ||
result = vu.create_list_of_msa_genes() | ||
expected_result = ['GENE1', 'GENE3'] | ||
assert result == expected_result | ||
mock_return_json.assert_called_once_with('gene.json') | ||
|
||
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene') | ||
def test_find_number_of_sample_ids(self, mock_get_rare_variants_by_gene): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap_dummy') | ||
mock_gene = 'GENE' | ||
mock_get_rare_variants_by_gene.return_value = [ | ||
{'CALL_INFO': 'ABC123'}, | ||
{'CALL_INFO': 'ABC123'}, | ||
{'CALL_INFO': 'BCD234'}, | ||
{'CALL_INFO': 'CDE345'} | ||
] | ||
result = vu.find_number_of_sample_ids(mock_gene) | ||
expected_result = 3 | ||
assert result == expected_result | ||
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID') | ||
|
||
@pytest.mark.parametrize('pos', [ | ||
'100000', | ||
'200000', | ||
'300000', | ||
'400000' | ||
]) | ||
@patch('dcicutils.variant_utils.VariantUtils.create_dict_from_json_file') | ||
def test_create_url(self, mock_create_dict_from_json_file, pos): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap_dummy') | ||
mock_gene = 'GENE' | ||
mock_create_dict_from_json_file.return_value = { | ||
'GENE': {pos: 20, '123456': 10}, | ||
'OTHER_GENE': {pos: 10} | ||
} | ||
result = vu.create_url(gene=mock_gene) | ||
expected_result = vu.SEARCH_RARE_VARIANTS_BY_GENE + mock_gene + (f'&variant.POS.from={pos}' | ||
f'&variant.POS.to={pos}&sort=-DP') | ||
assert result == expected_result | ||
mock_create_dict_from_json_file.assert_called_once_with('10+sorted_msa_genes_and_mutations.json') | ||
|
||
@patch('dcicutils.variant_utils.VariantUtils.return_json') | ||
def test_create_list_of_als_park_genes(self, mock_return_json): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_return_json.return_value = [ | ||
{'gene_symbol': 'GENE1', 'gene_summary': '...Parkinson...'}, | ||
{'gene_symbol': 'GENE2', 'gene_summary': '...............'}, | ||
{'gene_symbol': 'GENE3', 'gene_summary': '.....ALS.......'} | ||
] | ||
result = vu.create_list_of_als_park_genes() | ||
expected_result = ['GENE1', 'GENE3'] | ||
assert result == expected_result | ||
mock_return_json.assert_called_once_with('gene.json') |