-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_selection_function.py
194 lines (156 loc) · 6.15 KB
/
run_selection_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import os
import argparse
import numpy as np
import pandas as pd
import utils_logging as lu
import utils_plots as mplot
import matplotlib.pyplot as plt
import utils_emcee_poisson as mc
'''
2020 A. Moller
Obtain sample selection efficiency
using ratio data/sim (selected SNe Ia/simulated Ias)
'''
def copy_uncompress(fname):
import gzip
import shutil
thisfname = fname.split('/')[-1].split('.gz')[0]
with gzip.open(fname, 'rb') as f_in, open(thisfname, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
return thisfname
def load_fitres(fpath):
'''Load light curve fit file and apply cuts
Arguments:
fpath (str) -- Lightcurve fit file with path
Returns:
pandas dataframe
'''
if '.gz' in fpath:
fpath = copy_uncompress(fpath)
filetoload = pd.read_csv(fpath, index_col=False,
comment='#', delimiter=' ', skipinitialspace=True)
# apply cosmology cuts
tmp = filetoload[(filetoload['c'] > -0.3) & (filetoload['c'] < 0.3) & (filetoload['x1'] > -3) & (filetoload['x1']
< 3) & (filetoload['zHD'] > 0.05) & (filetoload['zHD'] < 0.9) & (filetoload['FITPROB'] > 1E-05)]
df = tmp.copy()
dfout = df
return dfout
def data_sim_ratio(data, sim, var='HOST_MAG_i', min_var=15, path_plots='./'):
"""
Ratio between data and simulation in a given variable
"""
# Init
# TODO: no hardcut for lower limit
data_var = data[data[var] > min_var][var]
sim_var = sim[sim[var] > min_var][var]
minv = min([x.quantile(0.01) for x in [data_var, sim_var]])
maxv = max([x.quantile(0.99) for x in [data_var, sim_var]])
bins = np.linspace(minv, maxv, 15)
bin_centers = 0.5 * (bins[1:] + bins[:-1])
hist_data, _ = np.histogram(data_var, bins=bins)
hist_sim, _ = np.histogram(sim_var, bins=bins)
err_data = np.sqrt(hist_data)
err_sim = np.sqrt(hist_sim)
ratio = hist_data / hist_sim
ratio = ratio / ratio.max()
err_ratio = np.sqrt((err_data / hist_data) ** 2 +
(err_sim / hist_sim) ** 2) * ratio
err_ratio = np.nan_to_num(err_ratio)
# save as dataframe
df = pd.DataFrame()
df['x'] = bin_centers
df['ratio'] = ratio
df['err_ratio'] = err_ratio
df['ndata'] = hist_data
df['nsim'] = hist_sim
df.ratio_variable = var
return df, minv, maxv
def write_seleff(A_mcmc, alpha_mcmc, beta_mcmc, nameout='SELEFF.DAT', min_var=15, max_var=26):
'''Writing selection function in SNANA friendly format
SNANA doesn't like uneven binning
take emcee output for sigmoid fit
create array with those values with uniform binning
Arguments:
emcee_fit A_mcmc, alpha_mcmc, beta_mcmc -- sigmoid fit parameters
nameout -- output name of selection function
'''
# min magnitude of the seleff
min_mag = min_var
mag = np.arange(min_mag, max_var, 0.1)
# create fit with emcee found parameters
sigmoid_arr = np.around(mc.sigmoid_func(
mag, 1., alpha_mcmc, beta_mcmc), decimals=2)
# normalizing to 1
sigmoid_arr = sigmoid_arr
# in case my sigmoid goes outside bounds
sigmoid_arr[sigmoid_arr > 1] = 1
sigmoid_arr[sigmoid_arr < 0] = 0
df = pd.DataFrame()
df["eff"] = sigmoid_arr
df["eff"] = df["eff"].astype(float).round(2)
df["mag_i"] = mag
df["i"] = df["mag_i"].astype(float).round(2)
# filling from high eff to lower the magnitudes
new_i = df.fillna(method='bfill')
df['SPECEFF'] = df['eff']
# column speceff
charar = np.chararray(len(new_i), itemsize=8)
charar[:] = 'SPECEFF:'
df["VARNAMES:"] = charar
df2 = pd.DataFrame(df, columns=['VARNAMES:', 'i', 'SPECEFF'])
# save
fout = open(nameout, "w")
fout.write("NVAR: 2 \n")
df2.to_csv(fout, sep=" ", index=False)
fout.close()
if __name__ == "__main__":
'''Parse arguments
'''
parser = argparse.ArgumentParser(
description='Selection function data vs simulations')
parser.add_argument('--data', type=str,
default='2_LCFIT/D_DATADES5YR/output/DESALL_forcePhoto_real_snana_fits/FITOPT000.FITRES',
help="Data file (ligthcurve fits: FITRES file)")
parser.add_argument('--sim', type=str,
default='2_LCFIT/D_DESSIMCC/output/PIP_COMPUTE_SPEC_EFF_DESSIMCC/FITOPT000.FITRES',
help="Simulation file (ligthcurve fits: FITRES file)")
parser.add_argument('--outpath', default='./dump/',
help='Path to save output')
parser.add_argument('--variable', default='HOST_MAG_i',
help='Variable used to compute selection function')
parser.add_argument('--extra_plots', action="store_true",
help='If c,x1 distributions are plotted')
# Init
args = parser.parse_args()
fdata = args.data
fsim = args.sim
variable = args.variable
path_selection_function = args.outpath
path_plots = f"{args.outpath}/plots"
os.makedirs(path_plots, exist_ok=True)
# Load fits files
print(f"data: {fdata}")
print(f"sim: {fsim}")
data = load_fitres(fdata)
sim = load_fitres(fsim)
lu.print_green('Finished loading data and sim fits')
# Selection config
min_var = data[variable].min()
max_var = data[variable].max()
# Data vs Sim ratio
df, minv, maxv = data_sim_ratio(data, sim, var=variable,
min_var=min_var, path_plots=path_plots)
# Emcee fit of dat/sim
theta_mcmc, min_theta_mcmc, max_theta_mcmc = mc.emcee_fitting(
df, path_plots, min_var=min_var)
# Write selection function in SNANA format
# not using contours
A_mcmc, alpha_mcmc, beta_mcmc = theta_mcmc
write_seleff(A_mcmc, alpha_mcmc, beta_mcmc, nameout=f"{path_selection_function}/SELEFF.DAT", min_var=min_var, max_var=max_var)
# Optional plots
if args.extra_plots:
# c,x1,z distributions
norm = mplot.distribution_plots(0, data, sim, path_plots)
# c,x1 as a function of z
mplot.plots_vs_z(data, sim, path_plots)
lu.print_blue('Finished extra plots')