Skip to content

Commit

Permalink
VariantUtils edited
Browse files Browse the repository at this point in the history
  • Loading branch information
TomDuraisingh committed Aug 14, 2023
1 parent c3eb6ef commit 7a4756b
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions dcicutils/variant_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

class VariantUtils:

SEARCH_VARIANTS_BY_GENE = '/search/?type=VariantSample&limit=1&variant.genes.genes_most_severe_gene.display_title='
SEARCH_RARE_VARIANTS_BY_GENE = '/search/?samplegeno.samplegeno_role=proband&type=VariantSample\
&variant.csq_gnomadg_af_popmax.from=0&variant.csq_gnomadg_af_popmax.to=0.001\
&variant.genes.genes_most_severe_gene.display_title='
SEARCH_VARIANTS_BY_GENE = (f'/search/?type=VariantSample&limit=1'
f'&variant.genes.genes_most_severe_gene.display_title=')
SEARCH_RARE_VARIANTS_BY_GENE = (f'/search/?samplegeno.samplegeno_role=proband&type=VariantSample'
f'&variant.csq_gnomadg_af_popmax.from=0&variant.csq_gnomadg_af_popmax.to=0.001'
f'&variant.genes.genes_most_severe_gene.display_title=')

def __init__(self, *, env_name) -> None:
self._key_manager = CGAPKeyManager()
Expand All @@ -21,28 +22,29 @@ def get_creds(self):
return self.creds

def get_rare_variants_by_gene(self, *, gene, sort, addon=''):
"""Does a search for rare variants on a particular gene"""
"""Searches for rare variants on a particular gene"""
return search_metadata(f'{self.base_url}/{self.SEARCH_RARE_VARIANTS_BY_GENE}{gene}\
&sort=-{sort}{addon}', key=self.creds)

def find_number_of_sample_ids(self, gene):
"""returns the number of samples that have a mutation on the specified gene"""
"""Returns the number of samples that have a mutation on the specified gene"""
return len(set(variant.get('CALL_INFO')
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID')))

def get_total_result_count_from_search(self, gene):
"""returns total number of variants associated with specified gene"""
"""Returns total number of variants associated with specified gene"""
res = get_metadata(self.SEARCH_VARIANTS_BY_GENE + gene, key=self.creds)
return res['total']

@staticmethod
def sort_dict_in_descending_order(unsorted_dict):
"""sorts dictionary in descending value order"""
"""Sorts dictionary in descending value order"""
sorted_list = sorted(unsorted_dict.items(), key=lambda x: x[1], reverse=True)
return dict(sorted_list)

def create_dict_of_mutations(self, gene):
"""cretes dictionary of specified gene and 10+ occuring positions with their number of variants"""
"""Creates dictionary of specified gene and mutations that occur 10+ times in database, in the form:
{gene: {mutation1 pos: #variants, mutation2 pos: #variants, ...}"""
mutation_dict = {}
unique_positions = set()
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'):
Expand All @@ -62,26 +64,28 @@ def return_json(file_name):

@staticmethod
def create_dict_from_json_file(file_name):
"""creates dictionary object from json file"""
"""Creates dictionary object from specified json file"""
with open(file_name) as f:
json_list = f.read()
return json.loads(json_list)

def create_list_of_msa_genes(self):
"""creates list of all genes relating to the brain or nervous system (by 'neur' and 'nerv')"""
"""Creates list of genes relating to the brain or nervous system
(determined by whether keywords 'neur' or 'nerv' in summary)"""
genes = self.return_json('gene.json')
return [gene['gene_symbol'] for gene in genes
if 'nerv' in gene.get('gene_summary', '')
or 'neur' in gene.get('gene_summary', '')]

def create_url(self, gene):
"""returns a url to the variants at the most commonly mutated position of a gene"""
"""Returns a url to the variants at the most commonly mutated position of specified gene"""
d = self.create_dict_from_json_file('10+sorted_msa_genes_and_mutations.json')
pos = list(d[gene].keys())[0]
return self.SEARCH_RARE_VARIANTS_BY_GENE + gene + f'&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP'

def create_list_of_als_park_genes(self):
"""cretes list of genes that mention Parkinson's or ALS in their summary"""
"""Creates list of genes that relating to Parkinson's or ALS
(determined by whether keywords 'Parkinson' or 'ALS' in summary)"""
genes = self.return_json('gene.json')
return [gene['gene_symbol'] for gene in genes
if 'Parkinson' in gene.get('gene_summary', '')
Expand Down

0 comments on commit 7a4756b

Please sign in to comment.