You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
#Import data generated in notebook 1 that contains microbiome measuresdf_unique=pd.read_csv('df_uniqueness_new.csv')
#check no. of samplesprint(df_unique.shape)
#set indexdf_unique.set_index(df_unique['Unnamed: 0'],inplace=True)
#Identify the median number of medications in 85+ year old participants#This corresponds to the reported stratification based on medication usediscovery_df=df_unique[df_unique['firstcohort']==1]
med_median=discovery_df['m1medsin'][discovery_df['age']>=85].median()
print('median no. of meds in 85+ year olds from the discovery cohort=',med_median)
print(np.percentile(df_unique['nfwlkspd'].dropna(),66))
walk_speed_cutoff=np.percentile(df_unique['nfwlkspd'].dropna(),66)
print(np.percentile(df_unique['lsc'].dropna(),66))
lsc_cutoff=np.percentile(df_unique['lsc'].dropna(),66)
#health stratifications: here we generate new variables that stratify each participant based on their performance#on health measures specified in the textscore=[]
forxindf_unique['qlhealth']:
ifx=='excellent':
score.append(1)
else:
score.append(0)
print(np.sum(score))
med=[]
forxindf_unique['m1medsin']:
ifx<=med_median:
med.append(1)
else:
med.append(0)
print(np.sum(med))
wlk=[]
#walking speed has 7 missing values that are either due to a participant not being able to come to the visit or not being#able to perform the test. These participants were all classified into the less healthy (low) group.forxindf_unique['nfwlkspd']:
ifx>=walk_speed_cutoff:
wlk.append(1)
else:
wlk.append(0)
lsc=[]
forxindf_unique['lsc']:
ifx>=lsc_cutoff:
lsc.append(1)
else:
lsc.append(0)
df_unique['percieved_health']=scoredf_unique['med']=meddf_unique['wlk']=wlkdf_unique['lsc_quant']=lsc#calculate total no. of times each participant was in the healthy group for composite healthy scoredf_unique['total_health']=df_unique['percieved_health']+df_unique['wlk']+df_unique['lsc_quant']+df_unique['med']
comp=[]
forxindf_unique['total_health']:
ifx>=3:
comp.append(1)
else:
comp.append(0)
#Generate stratfication variable (composite healthy vs. not composite healthy)df_unique['comp_healthy']=comp
df_unique['comp_healthy'].sum()
#save dataframe with health stratifications for demographics tabledf_unique.to_csv('demographics.csv')
df_unique.shape
#Correlation between Bray-Curtis Uniqueness and age reported in the text.discovery=df_unique[df_unique['firstcohort']==1]
print(scipy.stats.spearmanr(discovery['sqrt_min_bray_g'],discovery['age']))
scipy.stats.spearmanr(discovery['sqrt_min_bray'],discovery['age'])
#CI for plotting coefficientsresults['err']=results['beta_coef']-results['lower']
#results and sample sizes reported in Figure 4Eresults.index=results['Metric']
r_bray=results[results.index=='Bray-Curtis']
r_bray
#Figure 4E based on the above analysissns.set(font_scale=0.5,context='poster',font='Arial',style='white')
plt.figure(figsize=[18,18], dpi=200)
# Reorder it following the values:#ordered_df = df.sort_values(by='values')my_range=range(len(r_bray.index))
# Create a color if the group is "B"my_color=np.where(r_bray ['healthy(yes1/no0)']==1, 'darkblue', 'darkred')
my_size=np.where(r_bray ['healthy(yes1/no0)']<2, 100, 30)
plt.figure(figsize=[5,10], dpi=200)
plt.rcParams.update({'font.size': 24})
plt.rcParams['axes.facecolor'] ='white'#plt.rcParams['axes.facecolor'] = 'white'plt.hlines(y=my_range, xmin=0, xmax=r_bray['spearmanr'], color=my_color, alpha=0.5)
plt.scatter(r_bray['spearmanr'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and exis namesplt.xlim(-0.15,0.35)
plt.yticks(my_range, r_bray['healthy(yes1/no0)'])
plt.xlabel('Spearmanr')
plt.ylabel('Group')
#results and sample sizes reported in Figure S1aresults.index=results['Metric']
Weighted_Unifrac=results[results.index=='Weighted_Unifrac']
Weighted_Unifrac
#Figure 4E based on the above analysissns.set(font_scale=1.0,context='poster',font='Arial',style='white')
plt.figure(figsize=[18,18], dpi=200)
# Reorder it following the values:#ordered_df = df.sort_values(by='values')my_range=range(len(Weighted_Unifrac.index))
# Create a color if the group is "B"my_color=np.where(Weighted_Unifrac ['healthy(yes1/no0)']==1, 'darkblue', 'darkred')
my_size=np.where(Weighted_Unifrac ['healthy(yes1/no0)']<2, 100, 30)
plt.figure(figsize=[5,10], dpi=200)
plt.rcParams.update({'font.size': 24})
plt.rcParams['axes.facecolor'] ='white'#plt.rcParams['axes.facecolor'] = 'white'plt.hlines(y=my_range, xmin=0, xmax=Weighted_Unifrac['spearmanr'], color=my_color, alpha=0.5)
plt.scatter(Weighted_Unifrac['spearmanr'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and exis namesplt.xlim(-0.2,0.40)
plt.yticks(my_range, Weighted_Unifrac['healthy(yes1/no0)'])
plt.xlabel('spearmanr')
plt.ylabel('Group')
results.to_csv('spearman_healthy_aging_corr.csv')
#Same code for Alpha diversity#Perform pearson correlation between age and alpha diversity for MrOS participants across health stratifications and cohortscohorts=[0,1]
stratifications=['med','wlk','lsc_quant','percieved_health','comp_healthy']
health=[]
Metric=[]
Cohort=[]
Coefficient=[]
pvalue=[]
sample_size=[]
condition=[]
forxincohorts:
foryinstratifications:
df=df_unique[df_unique['firstcohort']==x]
df_healthy=df[df[y]==1]
condition.append(1)
sample_size.append(len(df_healthy))
health.append(y)
df_sick=df[df[y]!=1]
sample_size.append(len(df_sick))
health.append(y)
condition.append(0)
Metric.append('Shannon')
Metric.append('Shannon')
Cohort.append(x)
Cohort.append(x)
coef=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Shannon'])[0]
p=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Shannon'])[1]
Coefficient.append(coef)
pvalue.append(p)
coef=scipy.stats.spearmanr(df_sick['age'],df_sick['Shannon'])[0]
Coefficient.append(coef)
p=scipy.stats.spearmanr(df_sick['age'],df_sick['Shannon'])[1]
pvalue.append(p)
#Weighted_unifracdf=df_unique[df_unique['firstcohort']==x]
df_healthy=df[df[y]==1]
condition.append(1)
sample_size.append(len(df_healthy))
health.append(y)
df_sick=df[df[y]!=1]
sample_size.append(len(df_sick))
health.append(y)
condition.append(0)
Metric.append('Observed')
Metric.append('Observed')
Cohort.append(x)
Cohort.append(x)
coef=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Observed'])[0]
p=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Observed'])[1]
Coefficient.append(coef)
pvalue.append(p)
coef=scipy.stats.spearmanr(df_sick['age'],df_sick['Observed'])[0]
Coefficient.append(coef)
p=scipy.stats.spearmanr(df_sick['age'],df_sick['Observed'])[1]
pvalue.append(p)
results_alpha=pd.DataFrame()
results_alpha['Metric']=Metricresults_alpha['Health']=healthresults_alpha['cohort']=Cohortresults_alpha['spearmanr']=Coefficientresults_alpha['pvalue']=pvalueresults_alpha['sample_size']=sample_sizeresults_alpha['healthy(yes1/no0)']=conditionresults_alpha=results_alpha.sort_values(by=['cohort','Health','healthy(yes1/no0)'],ascending=True)
#results and sample sizes reported in Figure 4Eresults_alpha.index=results_alpha['Metric']
Shannon=results_alpha[results_alpha.index=='Shannon']
Shannon
#Figure 4E based on the above analysissns.set(font_scale=1.0,context='poster',font='Arial',style='white')
plt.figure(figsize=[18,18], dpi=200)
# Reorder it following the values:#ordered_df = df.sort_values(by='values')my_range=range(len(Shannon.index))
# Create a color if the group is "B"my_color=np.where(Shannon ['healthy(yes1/no0)']==1, 'darkgreen', 'grey')
my_size=np.where(Shannon ['healthy(yes1/no0)']<2, 100, 30)
plt.figure(figsize=[5,10], dpi=200)
plt.rcParams.update({'font.size': 24})
plt.rcParams['axes.facecolor'] ='white'#plt.rcParams['axes.facecolor'] = 'white'plt.hlines(y=my_range, xmin=0, xmax=Shannon['spearmanr'], color=my_color, alpha=0.5)
plt.scatter(Shannon['spearmanr'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and exis namesplt.xlim(-0.3,0.50)
plt.yticks(my_range, Shannon['healthy(yes1/no0)'])
plt.xlabel('Spearmanr')
plt.ylabel('Group')
#results and sample sizes reported in Figure 4EObserved=results_alpha[results_alpha.index=='Observed']
Observed
#Figure 4E based on the above analysissns.set(font_scale=1.0,context='poster',font='Arial',style='white')
plt.figure(figsize=[18,18], dpi=200)
# Reorder it following the values:#ordered_df = df.sort_values(by='values')my_range=range(len(r_bray.index))
# Create a color if the group is "B"my_color=np.where(Observed ['healthy(yes1/no0)']==1, 'gold', 'grey')
my_size=np.where(Observed ['healthy(yes1/no0)']<2, 100, 30)
plt.figure(figsize=[5,10], dpi=200)
plt.rcParams.update({'font.size': 24})
plt.rcParams['axes.facecolor'] ='white'#plt.rcParams['axes.facecolor'] = 'white'plt.hlines(y=my_range, xmin=0, xmax=Observed['spearmanr'], color=my_color, alpha=0.5)
plt.scatter(Observed['spearmanr'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and exis namesplt.xlim(-0.2,0.40)
plt.yticks(my_range, Observed['healthy(yes1/no0)'])
plt.xlabel('Pearsonr')
plt.ylabel('Group')
#Same code for Alpha diversity#Perform pearson correlation between age and alpha diversity for MrOS participants across health stratifications and cohortscohorts=[0,1]
stratifications=['med','wlk','lsc_quant','percieved_health','comp_healthy']
health=[]
Metric=[]
Cohort=[]
Coefficient=[]
pvalue=[]
sample_size=[]
condition=[]
forxincohorts:
foryinstratifications:
df=df_unique[df_unique['firstcohort']==x]
df_healthy=df[df[y]==1]
condition.append(1)
sample_size.append(len(df_healthy))
health.append(y)
df_sick=df[df[y]!=1]
sample_size.append(len(df_sick))
health.append(y)
condition.append(0)
Metric.append('Shannon')
Metric.append('Shannon')
Cohort.append(x)
Cohort.append(x)
coef=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Shannon_genus'])[0]
p=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Shannon_genus'])[1]
Coefficient.append(coef)
pvalue.append(p)
coef=scipy.stats.spearmanr(df_sick['age'],df_sick['Shannon_genus'])[0]
Coefficient.append(coef)
p=scipy.stats.spearmanr(df_sick['age'],df_sick['Shannon_genus'])[1]
pvalue.append(p)
#Weighted_unifracdf=df_unique[df_unique['firstcohort']==x]
df_healthy=df[df[y]==1]
condition.append(1)
sample_size.append(len(df_healthy))
health.append(y)
df_sick=df[df[y]!=1]
sample_size.append(len(df_sick))
health.append(y)
condition.append(0)
Metric.append('Observed')
Metric.append('Observed')
Cohort.append(x)
Cohort.append(x)
coef=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Observed_genus'])[0]
p=scipy.stats.spearmanr(df_healthy['age'],df_healthy['Observed_genus'])[1]
Coefficient.append(coef)
pvalue.append(p)
coef=scipy.stats.spearmanr(df_sick['age'],df_sick['Observed_genus'])[0]
Coefficient.append(coef)
p=scipy.stats.spearmanr(df_sick['age'],df_sick['Observed_genus'])[1]
pvalue.append(p)
results_alpha=pd.DataFrame()
results_alpha['Metric']=Metricresults_alpha['Health']=healthresults_alpha['cohort']=Cohortresults_alpha['spearmanr']=Coefficientresults_alpha['pvalue']=pvalueresults_alpha['sample_size']=sample_sizeresults_alpha['healthy(yes1/no0)']=conditionresults_alpha=results_alpha.sort_values(by=['cohort','Health','healthy(yes1/no0)'],ascending=True)
#results and sample sizes reported in Figure 4Eresults_alpha.index=results_alpha['Metric']
Shannon=results_alpha[results_alpha.index=='Shannon']
Shannon
#Figure 4E based on the above analysissns.set(font_scale=1.0,context='poster',font='Arial',style='white')
plt.figure(figsize=[18,18], dpi=200)
# Reorder it following the values:#ordered_df = df.sort_values(by='values')my_range=range(len(Shannon.index))
# Create a color if the group is "B"my_color=np.where(Shannon ['healthy(yes1/no0)']==1, 'darkgreen', 'grey')
my_size=np.where(Shannon ['healthy(yes1/no0)']<2, 100, 30)
plt.figure(figsize=[5,10], dpi=200)
plt.rcParams.update({'font.size': 24})
plt.rcParams['axes.facecolor'] ='white'#plt.rcParams['axes.facecolor'] = 'white'plt.hlines(y=my_range, xmin=0, xmax=Shannon['spearmanr'], color=my_color, alpha=0.5)
plt.scatter(Shannon['spearmanr'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and exis namesplt.xlim(-0.3,0.50)
plt.yticks(my_range, Shannon['healthy(yes1/no0)'])
plt.xlabel('Spearmanr')
plt.ylabel('Group')
#results and sample sizes reported in Figure 4EObserved=results_alpha[results_alpha.index=='Observed']
Observed
#Figure 4E based on the above analysissns.set(font_scale=1.0,context='poster',font='Arial',style='white')
plt.figure(figsize=[18,18], dpi=200)
# Reorder it following the values:#ordered_df = df.sort_values(by='values')my_range=range(len(r_bray.index))
# Create a color if the group is "B"my_color=np.where(Observed ['healthy(yes1/no0)']==1, 'gold', 'grey')
my_size=np.where(Observed ['healthy(yes1/no0)']<2, 100, 30)
plt.figure(figsize=[5,10], dpi=200)
plt.rcParams.update({'font.size': 24})
plt.rcParams['axes.facecolor'] ='white'#plt.rcParams['axes.facecolor'] = 'white'plt.hlines(y=my_range, xmin=0, xmax=Observed['spearmanr'], color=my_color, alpha=0.5)
plt.scatter(Observed['spearmanr'], my_range, color=my_color, s=my_size, alpha=1)
# Add title and exis namesplt.xlim(-0.3,0.50)
plt.yticks(my_range, Observed['healthy(yes1/no0)'])
plt.xlabel('Pearsonr')
plt.ylabel('Group')