Skip to content

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
anwarMZ committed Mar 7, 2023
2 parents a283815 + e3591a4 commit a9a2ad5
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions bin/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,22 +130,25 @@ def parse_INFO(df): # return INFO dataframe with named columns, including EFF sp
unknown = df['unknown'].str.split(pat=':').apply(pd.Series)
unknown.columns = [x.lower for x in ["GT","DP","AD","RO","QR","AO","QA","GL"]]
df = pd.concat([df, unknown], axis=1)
# make ALT, AO into lists
for column in ["ao", "ALT"]:
# make ALT, AO, type into lists
for column in ["ao", "ALT", "type"]:
df[column] = df[column].str.split(",")
# get number of AO values given in "unknown" column
df['ao_count'] = df["ao"].str.len()

# parse EFF entry from INFO
df["eff_result"] = [select_snpeff_records(x, y) for x, y in
zip(df['eff'], df["ao_count"])]
# check

# check how many "type" entries there are
#df['eff_result_len'] = df["eff_result"].str.len()
#mismatch = df[['POS', 'eff', 'eff_result', 'eff_result_len', 'ao', 'unknown']]
#df['type_len'] = df["type"].str.len()
#print(df.query('eff_result_len != type_len'))
#mismatch = df.query(df.query('eff_result_len != type_len'))[['POS', 'eff_result', 'eff_result_len', 'ao', 'type']]
#mismatch.to_csv("mismatches.csv", sep='\t', header=True, index=True)

# unnest list columns
df = unnest_multi(df, ["eff_result", "ao", "ALT"], reset_index=True)
df = unnest_multi(df, ["eff_result", "ao", "ALT", "type"], reset_index=True)

# calculate Alternate Frequency
df['AF'] = df['ao'].astype(int) / df['dp'].astype(int)
Expand Down

0 comments on commit a9a2ad5

Please sign in to comment.