-
Notifications
You must be signed in to change notification settings - Fork 1
/
build_mt.py
executable file
·813 lines (653 loc) · 42.6 KB
/
build_mt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
#!/usr/bin/env python3
'''
This is the first script in the "lightcone halo" pipeline. The goal of this script is to use merger
tree information to flag halos that intersect the lightcone and make a unique determination of which
halo catalog epoch from which to draw the halo.
Prerequisites:
`all_headers/` directory containing copies of the headers of all light cone particle files (~1100 per
simulation). If missing can be generated with `tools/script_headers.sh`.
Usage
-----
$ ./build_mt.py --help
'''
import sys
import glob
import time
import gc
import os
from pathlib import Path
import asdf
import numpy as np
from scipy.interpolate import interp1d
#import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
import argparse
from astropy.table import Table
from tools.InputFile import InputFile
from tools.merger import simple_load, get_zs_from_headers, get_halos_per_slab, get_one_header, unpack_inds, pack_inds, reorder_by_slab, mark_ineligible
from tools.aid_asdf import save_asdf
from tools.read_headers import get_lc_info
from tools.compute_dist import dist, wrapping
# these are probably just for testing; should be removed for production
DEFAULTS = {}
DEFAULTS['sim_name'] = "AbacusSummit_base_c000_ph006"
DEFAULTS['merger_parent'] = "/global/project/projectdirs/desi/cosmosim/Abacus/merger"
DEFAULTS['catalog_parent'] = "/global/project/projectdirs/desi/cosmosim/Abacus/halo_light_cones/"
#DEFAULTS['catalog_parent'] = "/global/cscratch1/sd/boryanah/new_lc_halos/"
DEFAULTS['z_start'] = 0.1
DEFAULTS['z_stop'] = 2.5
DEFAULTS['superslab_start'] = 0
CONSTANTS = {'c': 299792.458} # km/s, speed of light
def correct_inds(halo_ids, N_halos_slabs, slabs, inds_fn):
'''
Reorder indices for given halo index array with
corresponding n halos and slabs for its time epoch
'''
# number of halos in the loaded superslabs
N_halos_load = np.array([N_halos_slabs[i] for i in inds_fn])
# unpack slab and index for each halo
slab_ids, ids = unpack_inds(halo_ids)
# total number of halos in the slabs that we have loaded
N_halos = np.sum(N_halos_load)
offsets = np.zeros(len(inds_fn), dtype=int)
offsets[1:] = np.cumsum(N_halos_load)[:-1]
# determine if unpacking halos for only one file (Merger_this['HaloIndex']) -- no need to offset
if len(inds_fn) == 1: return ids
# select the halos belonging to given slab
for i, ind_fn in enumerate(inds_fn):
select = np.where(slab_ids == slabs[ind_fn])[0]
ids[select] += offsets[i]
return ids
def get_mt_info(fn_load, fields, minified):
'''
Load merger tree and progenitors information
'''
# loading merger tree info
mt_data = simple_load(fn_load, fields=fields)
# turn data into astropy table
Merger = mt_data['merger']
Merger.add_column(np.empty(len(Merger['HaloIndex']), dtype=np.float32), copy=False, name='ComovingDistance')
# if loading all progenitors
if "Progenitors" in fields:
num_progs = Merger["NumProgenitors"]
# get an array with the starting indices of the progenitors array
start_progs = np.zeros(len(num_progs), dtype=int)
start_progs[1:] = num_progs.cumsum()[:-1]
Merger.add_column(start_progs, name='StartProgenitors', copy=False)
return mt_data
def solve_crossing(r1, r2, pos1, pos2, chi1, chi2, Lbox, origin, chs, complete=False, extra=4.):
'''
Solve when the crossing of the light cones occurs and the
interpolated position and velocity. Merger trees loook for progenitors in a 4 Mpc/h radius
'''
# periodic wrapping of the positions of the particles
r1, r2, pos1, pos2 = wrapping(r1, r2, pos1, pos2, chs[1], chs[0], Lbox, origin, extra)
'''
# assert wrapping worked
print(r1.dtype, pos2.dtype)
r1_old, r2_old, pos1_old, pos2_old = r1.copy(), r2.copy(), pos1.copy(), pos2.copy()
r1_new, r2_new, pos1_new, pos2_new = wrapping(r1, r2, pos1, pos2, chs[1], chs[0], Lbox, origin, extra)
cond = ((r2_new <= chs[1]) & (r2_new > chs[0])) | ((r1_new <= chs[1]) & (r1_new > chs[0]))
print("index = ", np.arange(len(cond))[~cond])
print("after wrapping:")
print(np.array(pos1_new[~cond]))
print(np.array(pos2_new[~cond]))
print(np.array(r1_new[~cond]))
print(np.array(r2_new[~cond]))
print("hi, lo, origin = ", chs[1], chs[0], origin)
print(np.array(pos1_old[~cond]))
print(np.array(pos2_old[~cond]))
print(np.array(r1_old[~cond]))
print(np.array(r2_old[~cond]))
r1, r2, pos1, pos2 = r1_new, r2_new, pos1_new, pos2_new
'''
assert np.all(((r2 <= chs[1]) & (r2 > chs[0])) | ((r1 <= chs[1]) & (r1 > chs[0]))), "Wrapping didn't work"
# in a very very very very small number of cases (i.e. z = 0.8, corner halos), the current halo position
# and the main progenitor would both be within chi1 and chi2, but will be on opposite ends. In that case,
# we will just move things to the side of whoever's closer to the interpolated position (or just pick one)
assert np.sum(np.abs(pos2 - pos1) > extra) == 0, "There are halos on opposite ends after wrapping"
# solve for chi_star, where chi(z) = eta(z=0)-eta(z)
# equation is r1+(chi1-chi)/(chi1-chi2)*(r2-r1) = chi, with solution:
chi_star = (r1 * (chi1 - chi2) + chi1 * (r2 - r1)) / ((chi1 - chi2) + (r2 - r1))
# get interpolated positions of the halos
v_avg = (pos2 - pos1) / (chi1 - chi2)
pos_star = pos1 + v_avg * (chi1 - chi_star[:, None])
# enforce boundary conditions by periodic wrapping
#pos_star[pos_star >= Lbox/2.] = pos_star[pos_star >= Lbox/2.] - Lbox
#pos_star[pos_star < -Lbox/2.] = pos_star[pos_star < -Lbox/2.] + Lbox
# interpolated velocity [km/s]
vel_star = v_avg * CONSTANTS['c'] # vel1+a_avg*(chi1-chi_star)
# x is comoving position; r = x a; dr = a dx; r = a x; dr = da x + dx a; a/H
#vel_star = dx/deta = dr/dt − H(t)r -> r is real space coord dr/dt = vel_star + a H(t) x
# 'Htime', 'HubbleNow', 'HubbleTimeGyr', 'HubbleTimeHGyr'
# mark True if closer to chi2 (this snapshot)
bool_star = np.abs(chi1 - chi_star) > np.abs(chi2 - chi_star)
# condition to check whether halo in this light cone band
assert np.all(((chi_star <= chs[1]+extra) & (chi_star > chs[0]-extra))), "Solution is out of bounds"
return chi_star, pos_star, vel_star, bool_star
def offset_pos(pos,ind_origin,all_origins):
'''
Offset the interpolated positions to create continuous light cones
'''
# location of initial observer
first_observer = all_origins[0]
current_observer = all_origins[ind_origin]
offset = (first_observer-current_observer)
pos += offset
return pos
def main(sim_name, z_start, z_stop, merger_parent, catalog_parent, superslab_start, resume=False, plot=False, complete=False):
'''
Main function.
The algorithm: for each merger tree epoch, for
each superslab, for each light cone origin,
compute the intersection of the light cone with
each halo, using the interpolated position
to the previous merger epoch (and possibly a
velocity correction). If the intersection is
between the current and previous merger epochs,
then record the closer one as that halo's
epoch and mark its progenitors as ineligible.
Will need one padding superslab in the previous
merger epoch. Can process in a rolling fashion.
'''
# turn directories into Paths
merger_parent = Path(merger_parent)
catalog_parent = Path(catalog_parent)
merger_dir = merger_parent / sim_name
header = get_one_header(merger_dir)
# simulation parameters
Lbox = header['BoxSize']
# location of the LC origins in Mpc/h
origins = np.array(header['LightConeOrigins']).reshape(-1,3)
# just for testing with highbase. remove!
if 'highbase' in sim_name:
origins /= 2.
# directory where we save the final outputs
cat_lc_dir = catalog_parent / "halo_light_cones" / sim_name
os.makedirs(cat_lc_dir, exist_ok=True)
# directory where we save the current state if we want to resume
os.makedirs(cat_lc_dir / "tmp", exist_ok=True)
with open(cat_lc_dir / "tmp" / "build.log", "a") as f:
f.writelines(["# Starting light cone catalog construction in simulation %s \n"%sim_name])
# all redshifts, steps and comoving distances of light cones files; high z to low z
# remove presaving after testing done (or make sure presaved can be matched with simulation)
if not os.path.exists(Path("data_headers") / sim_name / "coord_dist.npy") or not os.path.exists(Path("data_headers") / sim_name / "redshifts.npy") or not os.path.exists(Path("data_headers") / sim_name / "eta_drift.npy"):
zs_all, steps_all, chis_all, etad_all = get_lc_info(Path("all_headers") / sim_name)
os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
np.save(Path("data_headers") / sim_name / "steps.npy", steps_all)
np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
np.save(Path("data_headers") / sim_name / "eta_drift.npy", etad_all)
zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
zs_all[-1] = float("%.1f" % zs_all[-1]) # LHG: I guess this is trying to match up to some filename or something?
# get functions relating chi and z
#chi_of_z = interp1d(np.insert(zs_all, 0, 0.), np.insert(chis_all, 0, 0.))
#z_of_chi = interp1d(np.insert(chis_all, 0, 0.), np.insert(zs_all, 0, 0.))
chi_of_z = interp1d(zs_all,chis_all)
z_of_chi = interp1d(chis_all, zs_all)
# if merger tree redshift information has been saved, load it (if not, save it)
if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
# all merger tree snapshots and corresponding redshifts
snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
zs_mt = get_zs_from_headers(snaps_mt)
os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")
# number of superslabs
n_superslabs = len(list(merger_dir.glob("associations_z%4.3f.*.asdf"%zs_mt[0])))
print("number of superslabs = ",n_superslabs)
# starting and finishing redshift indices indices
ind_start = np.argmin(np.abs(zs_mt - z_start))
ind_stop = np.argmin(np.abs(zs_mt - z_stop))
# initialize difference between the conformal time of the previous two catalogs
delta_chi_old = 0.
if resume:
# if user wants to resume from previous state, create padded array for marking whether superslab has been loaded
resume_flags = np.ones((n_superslabs, origins.shape[1]), dtype=bool)
# previous redshift, distance between shells
infile = InputFile(cat_lc_dir / "tmp" / "build.log")
z_this_tmp = infile.z_prev
delta_chi_old = infile.delta_chi
superslab = infile.super_slab
assert (np.abs(zs_mt[ind_start] - z_this_tmp) < 1.0e-6), "Your recorded state is not for the currently requested redshift, can't resume from old. Last recorded state is z = %.3f"%z_this_tmp
assert (np.abs((superslab_start-1)%n_superslabs - superslab) < 1.0e-6), "Your recorded state is not for the currently requested superslab, can't resume from old. Last recorded state is superslab = %d"%superslab
with open(cat_lc_dir / "tmp" / "build.log", "a") as f:
f.writelines(["# Resuming from redshift z = %4.3f \n"%z_this_tmp])
else:
# delete the exisiting temporary files
tmp_files = list((cat_lc_dir / "tmp").glob("*"))
for i in range(len(tmp_files)):
os.unlink(str(tmp_files[i]))
resume_flags = np.zeros((n_superslabs, origins.shape[0]), dtype=bool)
# fields to extract from the merger trees
fields_mt = ['HaloIndex', 'Position', 'MainProgenitor', 'Progenitors', 'NumProgenitors']
# lighter version
#fields_mt = ['HaloIndex', 'Position', 'MainProgenitor']
# redshift of closest point on wall between original and copied box
z1 = z_of_chi(0.5 * Lbox - origins[0][0])
# redshift of closest point where all three boxes touch
z2 = z_of_chi((0.5*Lbox-origins[0][0])*np.sqrt(2))
# furthest point where all three boxes touch
z3 = z_of_chi((0.5 * Lbox - origins[0][0]) * np.sqrt(3))
for i in range(ind_start, ind_stop + 1):
# this snapshot redshift and the previous
z_this = zs_mt[i]
z_prev = zs_mt[i + 1]
#z_pprev = zs_mt[i + 2] # not currently used
print("redshift of this and the previous snapshot = ", z_this, z_prev)
# the names of the folders need to be standardized tuks
zname_this = min(header['L1OutputRedshifts'], key=lambda z: abs(z - z_this))
# check that you are starting at a reasonable redshift
assert z_this >= np.min(zs_all), "You need to set starting redshift to the smallest value of the merger tree"
# coordinate distance of the light cone at this redshift and the previous
chi_this = chi_of_z(z_this)
chi_prev = chi_of_z(z_prev)
#chi_pprev = chi_of_z(z_pprev) # not currently used
delta_chi = chi_prev - chi_this
#delta_chi_new = chi_pprev - chi_prev # not currently used
print("comoving distance between this and previous snapshot = ", delta_chi)
# read merger trees file names at this and previous snapshot from minified version
fns_this = merger_dir.glob(f'associations_z{z_this:4.3f}.*.asdf.minified')
fns_prev = merger_dir.glob(f'associations_z{z_prev:4.3f}.*.asdf.minified')
fns_this = list(fns_this)
fns_prev = list(fns_prev)
minified = True
# if minified files not available, load the regular files
if len(list(fns_this)) == 0 or len(list(fns_prev)) == 0:
fns_this = merger_dir.glob(f'associations_z{z_this:4.3f}.*.asdf')
fns_prev = merger_dir.glob(f'associations_z{z_prev:4.3f}.*.asdf')
fns_this = list(fns_this)
fns_prev = list(fns_prev)
minified = False
# turn file names into strings
for counter in range(len(fns_this)):
fns_this[counter] = str(fns_this[counter])
fns_prev[counter] = str(fns_prev[counter])
# number of merger tree files
print("number of files = ", len(fns_this), len(fns_prev))
assert n_superslabs == len(fns_this) and n_superslabs == len(fns_prev), "Incomplete merger tree files"
# reorder file names by super slab number
fns_this = reorder_by_slab(fns_this, minified)
fns_prev = reorder_by_slab(fns_prev, minified)
# get number of halos in each slab and number of slabs
N_halos_slabs_this, slabs_this = get_halos_per_slab(fns_this, minified)
N_halos_slabs_prev, slabs_prev = get_halos_per_slab(fns_prev, minified)
# We're going to be loading slabs in a rolling fashion:
# reading the "high" slab at the leading edge, discarding the trailing "low" slab
# and moving the mid to low. But first we need to read all three to prime the queue
mt_prev = {} # indexed by slab num
mt_prev[(superslab_start-1)%n_superslabs] = get_mt_info(fns_prev[(superslab_start-1)%n_superslabs], fields=fields_mt, minified=minified)
mt_prev[superslab_start] = get_mt_info(fns_prev[superslab_start], fields=fields_mt, minified=minified)
weirdness = 0
# loop over each superslab
for k in range(superslab_start,n_superslabs):
# starting and finishing superslab superslabs
klow = (k-1)%n_superslabs
khigh = (k+1)%n_superslabs
# slide down by one
if (klow-1)%n_superslabs in mt_prev:
del mt_prev[(klow-1)%n_superslabs]
mt_prev[khigh] = get_mt_info(fns_prev[khigh], fields_mt, minified)
# starting and finishing superslab superslabs
inds_fn_this = [k]
inds_fn_prev = np.array([klow,k,khigh],dtype=int)
print("superslabs loaded in this and previous redshifts = ",inds_fn_this, inds_fn_prev)
# get merger tree data for this snapshot and for the previous one
mt_data_this = get_mt_info(fns_this[k], fields_mt, minified)
# number of halos in this step and previous step; this depends on the number of files requested
N_halos_this = np.sum(N_halos_slabs_this[inds_fn_this])
N_halos_prev = np.sum(N_halos_slabs_prev[inds_fn_prev])
print("N_halos_this = ", N_halos_this)
print("N_halos_prev = ", N_halos_prev)
# organize data into astropy tables
Merger_this = mt_data_this['merger']
cols = {col:np.empty(N_halos_prev, dtype=(Merger_this[col].dtype, Merger_this[col].shape[1] if 'Position' in col else 1)) for col in Merger_this.keys()}
Merger_prev = Table(cols, copy=False)
offset = 0
for key in mt_prev.keys():
size_superslab = len(mt_prev[key]['merger']['HaloIndex'])
Merger_prev[offset:offset+size_superslab] = mt_prev[key]['merger'][:]
offset += size_superslab
# mask where no merger tree info is available (because we don'to need to solve for eta star for those)
noinfo_this = Merger_this['MainProgenitor'] <= 0
info_this = Merger_this['MainProgenitor'] > 0
# print percentage where no information is available or halo not eligible
print("percentage no info = ", np.sum(noinfo_this) / len(noinfo_this) * 100.0)
# no info is denoted by 0 or -999 (or regular if ineligible), but -999 messes with unpacking, so we set it to 0
Merger_this['MainProgenitor'][noinfo_this] = 0
# rework the main progenitor and halo indices to return in proper order
Merger_this['HaloIndex'] = correct_inds(
Merger_this['HaloIndex'],
N_halos_slabs_this,
slabs_this,
inds_fn_this,
)
Merger_this['MainProgenitor'] = correct_inds(
Merger_this['MainProgenitor'],
N_halos_slabs_prev,
slabs_prev,
inds_fn_prev,
)
Merger_prev['HaloIndex'] = correct_inds(
Merger_prev['HaloIndex'],
N_halos_slabs_prev,
slabs_prev,
inds_fn_prev,
)
# loop over all origins
for o in range(len(origins)):
# location of the observer
origin = origins[o]
# comoving distance to observer
Merger_this['ComovingDistance'][:] = dist(Merger_this['Position'], origin)
Merger_prev['ComovingDistance'][:] = dist(Merger_prev['Position'], origin)
# merger tree data of main progenitor halos corresponding to the halos in current snapshot
Merger_prev_main_this = Merger_prev[Merger_this['MainProgenitor']].copy()
# if eligible, can be selected for light cone redshift catalog
if (i != ind_start) or resume_flags[k, o]:
# dealing with the fact that these files may not exist for all origins and all superslabs
if os.path.exists(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_this, o, k))):
eligibility_this = np.load(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_this, o, k)))
eligibility_extrap_this = np.load(cat_lc_dir / "tmp" / ("eligibility_extrap_prev_z%4.3f_lc%d.%02d.npy"%(z_this, o, k)))
else:
eligibility_this = np.ones(N_halos_this, dtype=bool)
eligibility_extrap_this = np.ones(N_halos_this, dtype=bool)
else:
eligibility_this = np.ones(N_halos_this, dtype=bool)
eligibility_extrap_this = np.ones(N_halos_this, dtype=bool)
# for a newly opened redshift, everyone is eligible to be part of the light cone catalog
eligibility_prev = np.ones(N_halos_prev, dtype=bool)
eligibility_extrap_prev = np.ones(N_halos_prev, dtype=bool)
# only halos without merger tree info are allowed to use the extrap quantities; this is relevant if you're doing
# mask for eligible halos for light cone origin with and without information
mask_noinfo_this = noinfo_this & eligibility_this & eligibility_extrap_this
mask_info_this = info_this & eligibility_this
# halos that have merger tree information
Merger_this_info = Merger_this[mask_info_this].copy()
Merger_prev_main_this_info = Merger_prev_main_this[mask_info_this]
# halos that don't have merger tree information
Merger_this_noinfo = Merger_this[mask_noinfo_this].copy()
# if interpolating to z = 0.1 (kinda ugly way to do this)
if complete and np.abs(z_this - 0.1) < 1.e-3:
print(f"extending {z_this:4.3f} all the way to z = 0")
chi_low = 0.
else:
chi_low = chi_this
# select objects that are crossing the light cones
#chs = np.array([chi_low, chi_prev], dtype=np.float32) # doesn't take past halos
chs = np.array([chi_low - delta_chi_old / 2.0, chi_prev], dtype=np.float32) # taking some additional objects from before
#chs = np.array([chi_low - delta_chi_old / 2.0, chi_prev + delta_chi_new / 2.0], dtype=np.float32) # TESTING
cond_1 = ((Merger_this_info['ComovingDistance'] > chs[0]) & (Merger_this_info['ComovingDistance'] <= chs[1]))
cond_2 = ((Merger_prev_main_this_info['ComovingDistance'] > chs[0]) & (Merger_prev_main_this_info['ComovingDistance'] <= chs[1]))
mask_lc_this_info = cond_1 | cond_2
del cond_1, cond_2
# for halos that have no merger tree information, we simply take their current position
# og
cond_1 = (Merger_this_noinfo['ComovingDistance'] > chi_low - delta_chi_old / 2.0)
cond_2 = (Merger_this_noinfo['ComovingDistance'] <= chi_low + delta_chi / 2.0)
# TESTING
#cond_1 = (Merger_this_noinfo['ComovingDistance'] > chi_low)
#cond_2 = (Merger_this_noinfo['ComovingDistance'] <= chi_low + delta_chi)
mask_lc_this_noinfo = (cond_1 & cond_2)
del cond_1, cond_2
# spare the computer the effort and avert empty array errors
# TODO: perhaps revise, as sometimes we might have no halos in
# noinfo but some in info and vice versa
if np.sum(mask_lc_this_info) == 0 or np.sum(mask_lc_this_noinfo) == 0: continue
# percentage of objects that are part of this or previous snapshot
print(
"percentage of halos in light cone %d with and without progenitor info = "%o,
np.sum(mask_lc_this_info) / len(mask_lc_this_info) * 100.0,
np.sum(mask_lc_this_noinfo) / len(mask_lc_this_noinfo) * 100.0,
)
# select halos with mt info that have had a light cone crossing
Merger_this_info_lc = Merger_this_info[mask_lc_this_info]
Merger_prev_main_this_info_lc = Merger_prev_main_this_info[mask_lc_this_info]
if plot:
x_min = -Lbox/2.+k*(Lbox/n_superslabs)
x_max = x_min+(Lbox/n_superslabs)
x = Merger_this_info_lc['Position'][:,0]
choice = (x > x_min) & (x < x_max)
y = Merger_this_info_lc['Position'][choice,1]
z = Merger_this_info_lc['Position'][choice,2]
plt.figure(1)
plt.scatter(y, z, color='dodgerblue', s=0.1, label='current objects')
plt.legend()
plt.axis('equal')
plt.savefig('this_%d_%d_%d.png'%(i, k, o))
plt.close()
x = Merger_prev_main_this_info_lc['Position'][:,0]
choice = (x > x_min) & (x < x_max)
y = Merger_prev_main_this_info_lc['Position'][choice,1]
z = Merger_prev_main_this_info_lc['Position'][choice,2]
plt.figure(2)
plt.scatter(y, z, color='orangered', s=0.1, label='main progenitor')
plt.legend()
plt.axis('equal')
plt.savefig('prev_%d_%d_%d.png'%(i, k, o))
plt.close()
# select halos without mt info that have had a light cone crossing
Merger_this_noinfo_lc = Merger_this_noinfo[mask_lc_this_noinfo]
# add columns for new interpolated position, velocity and comoving distance
Merger_this_info_lc.add_column('InterpolatedPosition',copy=False)
Merger_this_info_lc.add_column('InterpolatedVelocity',copy=False)
Merger_this_info_lc.add_column('InterpolatedComoving',copy=False)
# get chi star where lc crosses halo trajectory; bool is False where closer to previous
(
Merger_this_info_lc['InterpolatedComoving'],
Merger_this_info_lc['InterpolatedPosition'],
Merger_this_info_lc['InterpolatedVelocity'],
bool_star_this_info_lc,
) = solve_crossing(
Merger_prev_main_this_info_lc['ComovingDistance'],
Merger_this_info_lc['ComovingDistance'],
Merger_prev_main_this_info_lc['Position'],
Merger_this_info_lc['Position'],
chi_prev,
chi_this,
Lbox,
origin,
chs,
complete=(complete and np.abs(z_this - 0.1) < 1.e-3),
)
# number of objects in this light cone
N_this_star_lc = np.sum(bool_star_this_info_lc)
N_this_noinfo_lc = np.sum(mask_lc_this_noinfo)
if i != ind_start or resume_flags[k, o]:
# check if we have information about this light cone origin, superslab and epoch
if os.path.exists(cat_lc_dir / "tmp" / ("Merger_next_z%4.3f_lc%d.%02d.asdf"%(z_this, o, k))):
# load leftover halos from previously loaded redshift
with asdf.open(cat_lc_dir / "tmp" / ("Merger_next_z%4.3f_lc%d.%02d.asdf"%(z_this, o, k)), lazy_load=True, copy_arrays=True) as f:
Merger_next = f['data']
# if you are a halo that appears here, we are gonna ignore you
Merger_next = Table(Merger_next)
N_next_lc = len(Merger_next['HaloIndex'])
# tmp1: to-append and extrapolated from before; tmp2: to-append and interpolated now; get rid od these; TODO: can be done less expensively
tmp1 = np.in1d(Merger_next['HaloIndex'][:], pack_inds(Merger_this['HaloIndex'][~eligibility_extrap_this], k))
tmp2 = np.in1d(Merger_next['HaloIndex'][:], pack_inds(Merger_this_info_lc['HaloIndex'][:], k))
tmp3 = ~(tmp1 & tmp2)
# if we found you in the interpolated halos in this redshift, you can't be allowed to be appended as part of Merger_next
Merger_next = Merger_next[tmp3]
del tmp1, tmp2, tmp3
# adding contributions from the previously loaded redshift
N_next_lc = len(Merger_next['HaloIndex'])
else:
N_next_lc = 0
else:
N_next_lc = 0
# total number of halos belonging to this light cone superslab and origin
N_lc = N_this_star_lc + N_this_noinfo_lc + N_next_lc
print("in this snapshot: interpolated, no info, next, total = ", N_this_star_lc * 100.0 / N_lc, N_this_noinfo_lc * 100.0 / N_lc, N_next_lc * 100.0 / N_lc, N_lc)
# save those arrays
Merger_lc = Table(
{'HaloIndex':np.zeros(N_lc, dtype=Merger_this_info_lc['HaloIndex'].dtype),
'InterpolatedVelocity': np.zeros(N_lc, dtype=(np.float32,3)),
'InterpolatedPosition': np.zeros(N_lc, dtype=(np.float32,3)),
'InterpolatedComoving': np.zeros(N_lc, dtype=np.float32)
}
)
# record interpolated position and velocity for those with info belonging to current redshift
Merger_lc['InterpolatedPosition'][:N_this_star_lc] = Merger_this_info_lc['InterpolatedPosition'][bool_star_this_info_lc]
Merger_lc['InterpolatedVelocity'][:N_this_star_lc] = Merger_this_info_lc['InterpolatedVelocity'][bool_star_this_info_lc]
Merger_lc['InterpolatedComoving'][:N_this_star_lc] = Merger_this_info_lc['InterpolatedComoving'][bool_star_this_info_lc]
Merger_lc['HaloIndex'][:N_this_star_lc] = Merger_this_info_lc['HaloIndex'][bool_star_this_info_lc]
# record interpolated position and velocity of the halos in the light cone without progenitor information
Merger_lc['InterpolatedPosition'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = Merger_this_noinfo_lc['Position']
Merger_lc['InterpolatedVelocity'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = np.zeros_like(Merger_this_noinfo_lc['Position'])
Merger_lc['InterpolatedComoving'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = Merger_this_noinfo_lc['ComovingDistance'] # assign comoving distance based on position; used to be np.ones(Merger_this_noinfo_lc['Position'].shape[0])*chi_this
Merger_lc['HaloIndex'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = Merger_this_noinfo_lc['HaloIndex']
del Merger_this_noinfo_lc
# pack halo indices for all halos but those in Merger_next
Merger_lc['HaloIndex'][:(N_this_star_lc + N_this_noinfo_lc)] = pack_inds(Merger_lc['HaloIndex'][:(N_this_star_lc + N_this_noinfo_lc)], k)
# record information from previously loaded redshift that was postponed
if i != ind_start or resume_flags[k, o]:
if N_next_lc != 0:
Merger_lc['InterpolatedPosition'][-N_next_lc:] = Merger_next['InterpolatedPosition'][:]
Merger_lc['InterpolatedVelocity'][-N_next_lc:] = Merger_next['InterpolatedVelocity'][:]
Merger_lc['InterpolatedComoving'][-N_next_lc:] = Merger_next['InterpolatedComoving'][:]
Merger_lc['HaloIndex'][-N_next_lc:] = Merger_next['HaloIndex'][:]
del Merger_next
resume_flags[k, o] = False
# offset position to make light cone continuous
Merger_lc['InterpolatedPosition'] = offset_pos(Merger_lc['InterpolatedPosition'], ind_origin = o, all_origins=origins)
# create directory for this redshift
os.makedirs(cat_lc_dir / ("z%.3f"%zname_this), exist_ok=True)
'''
_, inds = np.unique(Merger_lc['HaloIndex'], return_index=True)
print("UNIQUE overall = ", len(inds)*100./N_lc)
_, inds = np.unique(Merger_lc['HaloIndex'][:-N_next_lc], return_index=True)
print("UNIQUE without next = ", len(inds)*100./len(Merger_lc['HaloIndex'][:-N_next_lc]))
inds1 = np.arange(N_this_star_lc, dtype=int)
inds2 = np.arange(N_this_star_lc+N_this_noinfo_lc, N_lc, dtype=int)
inds3 = np.hstack((inds1, inds2))
_, inds = np.unique(Merger_lc['HaloIndex'][inds3], return_index=True)
print("UNIQUE info and next = ", len(inds)*100./len(Merger_lc['HaloIndex'][inds3]))
inds3 = np.arange(N_this_star_lc, N_lc, dtype=int)
_, inds = np.unique(Merger_lc['HaloIndex'][inds3], return_index=True)
print("UNIQUE noinfo and next = ", len(inds)*100./len(Merger_lc['HaloIndex'][inds3]))
if weirdness == 1:
quit()
weirdness += 1
'''
# write table with interpolated information
save_asdf(Merger_lc, ("Merger_lc%d.%02d"%(o,k)), header, cat_lc_dir / ("z%.3f"%zname_this))
# mask of the extrapolated halos
mask_extrap = (Merger_this_info_lc['InterpolatedComoving'] > chi_prev) | (Merger_this_info_lc['InterpolatedComoving'] < chi_this)
print("percentage extrapolated = ", np.sum(mask_extrap)*100./len(mask_extrap))
# TODO: Need to make sure no bugs with eligibility
# version 1: only the main progenitor is marked ineligible
# if halo belongs to this redshift catalog or the previous redshift catalog
eligibility_prev[Merger_prev_main_this_info_lc['HaloIndex'][~mask_extrap]] = False
eligibility_extrap_prev[Merger_prev_main_this_info_lc['HaloIndex'][mask_extrap]] = False
print("number eligible = ", np.sum(eligibility_prev), np.sum(eligibility_extrap_prev))
# version 2: all progenitors of halos belonging to this redshift catalog are marked ineligible
# run version 1 AND 2 to mark ineligible Merger_next objects to avoid multiple entries
# Note that some progenitor indices are zeros
# For best result perhaps combine Progs with MainProgs
if "Progenitors" in fields_mt:
nums = Merger_this_info_lc['NumProgenitors'][bool_star_this_info_lc]
starts = Merger_this_info_lc['StartProgenitors'][bool_star_this_info_lc]
# for testing purposes (remove in final version)
main_progs = Merger_this_info_lc['MainProgenitor'][bool_star_this_info_lc]
progs = mt_data_this['progenitors']['Progenitors']
halo_ind_prev = Merger_prev['HaloIndex']
N_halos_load = np.array([N_halos_slabs_prev[i] for i in inds_fn_prev])
slabs_prev_load = np.array([slabs_prev[i] for i in slabs_prev[inds_fn_prev]],dtype=np.int64)
offsets = np.zeros(len(inds_fn_prev), dtype=np.int64)
offsets[1:] = np.cumsum(N_halos_load)[:-1]
# mark ineligible the progenitors of the halos interpolated in this catalog
eligibility_prev = mark_ineligible(nums, starts, main_progs, progs, halo_ind_prev, eligibility_prev, offsets, slabs_prev_load)
print("number eligible after progenitors removal = ", np.sum(eligibility_prev), np.sum(eligibility_extrap_prev))
# information to keep for next redshift considered
N_next = np.sum(~bool_star_this_info_lc)
Merger_next = Table(
{'HaloIndex': np.zeros(N_next, dtype=Merger_lc['HaloIndex'].dtype),
'InterpolatedVelocity': np.zeros(N_next, dtype=(np.float32,3)),
'InterpolatedPosition': np.zeros(N_next, dtype=(np.float32,3)),
'InterpolatedComoving': np.zeros(N_next, dtype=np.float32)
}
)
Merger_next['HaloIndex'][:] = Merger_prev_main_this_info_lc['HaloIndex'][~bool_star_this_info_lc]
Merger_next['InterpolatedVelocity'][:] = Merger_this_info_lc['InterpolatedVelocity'][~bool_star_this_info_lc]
Merger_next['InterpolatedPosition'][:] = Merger_this_info_lc['InterpolatedPosition'][~bool_star_this_info_lc]
Merger_next['InterpolatedComoving'][:] = Merger_this_info_lc['InterpolatedComoving'][~bool_star_this_info_lc]
del Merger_this_info_lc, Merger_prev_main_this_info_lc
if plot:
# select the halos in the light cones
pos_choice = Merger_lc['InterpolatedPosition']
# selecting thin slab
pos_x_min = -Lbox/2.+k*(Lbox/n_superslabs)
pos_x_max = x_min+(Lbox/n_superslabs)
ijk = 0
choice = (pos_choice[:, ijk] >= pos_x_min) & (pos_choice[:, ijk] < pos_x_max)
circle_this = plt.Circle(
(origins[0][1], origins[0][2]), radius=chi_this, color="g", fill=False
)
circle_prev = plt.Circle(
(origins[0][1], origins[0][2]), radius=chi_prev, color="r", fill=False
)
# clear things for fresh plot
ax = plt.gca()
ax.cla()
# plot particles
ax.scatter(pos_choice[choice, 1], pos_choice[choice, 2], s=0.1, alpha=1., color="dodgerblue")
# circles for in and prev
ax.add_artist(circle_this)
ax.add_artist(circle_prev)
plt.xlabel([-Lbox/2., Lbox*1.5])
plt.ylabel([-Lbox/2., Lbox*1.5])
plt.axis("equal")
plt.savefig('interp_%d_%d_%d.png'%(i, k, o))
#plt.show()
plt.close()
gc.collect()
# pack halo indices for the halos in Merger_next
offset = 0
for idx in inds_fn_prev:
print("k, idx = ",k,idx)
choice_idx = (offset <= Merger_next['HaloIndex'][:]) & (Merger_next['HaloIndex'][:] < offset+N_halos_slabs_prev[idx])
Merger_next['HaloIndex'][choice_idx] = pack_inds(Merger_next['HaloIndex'][choice_idx]-offset, idx)
offset += N_halos_slabs_prev[idx]
# split the eligibility array over three files for the three superslabs it's made up of
offset = 0
for idx in inds_fn_prev:
eligibility_prev_idx = eligibility_prev[offset:offset+N_halos_slabs_prev[idx]]
eligibility_extrap_prev_idx = eligibility_extrap_prev[offset:offset+N_halos_slabs_prev[idx]]
# combine current information with previously existing
if os.path.exists(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx))):
eligibility_prev_old = np.load(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)))
eligibility_prev_idx = eligibility_prev_old & eligibility_prev_idx
eligibility_extrap_prev_old = np.load(cat_lc_dir / "tmp" / ("eligibility_extrap_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)))
eligibility_extrap_prev_idx = eligibility_extrap_prev_old & eligibility_extrap_prev_idx
print("Appending to existing eligibility file for %4.3f, %d, %02d!"%(z_prev, o, idx))
else:
print("First time seeing eligibility file for %4.3f, %d, %02d!"%(z_prev, o, idx))
np.save(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)), eligibility_prev_idx)
np.save(cat_lc_dir / "tmp" / ("eligibility_extrap_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)), eligibility_extrap_prev_idx)
offset += N_halos_slabs_prev[idx]
# write as table the information about halos that are part of next loaded redshift
save_asdf(Merger_next, ("Merger_next_z%4.3f_lc%d.%02d"%(z_prev, o, k)), header, cat_lc_dir / "tmp")
# save redshift of catalog that is next to load and difference in comoving between this and prev
with open(cat_lc_dir / "tmp" / "build.log", "a") as f:
f.writelines(["# Next iteration: \n", "z_prev = %.8f \n"%z_prev, "delta_chi = %.8f \n"%delta_chi, "light_cone = %d \n"%o, "super_slab = %d \n"%k])
del Merger_this, Merger_prev
# update values for difference in comoving distance
delta_chi_old = delta_chi
# dict_keys(['HaloIndex', 'HaloMass', 'HaloVmax', 'IsAssociated', 'IsPotentialSplit', 'MainProgenitor', 'MainProgenitorFrac', 'MainProgenitorPrec', 'MainProgenitorPrecFrac', 'NumProgenitors', 'Position', 'Progenitors'])
class ArgParseFormatter(argparse.RawDescriptionHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__, formatter_class=ArgParseFormatter)
parser.add_argument('--sim_name', help='Simulation name', default=DEFAULTS['sim_name'])
parser.add_argument('--z_start', help='Initial redshift where we start building the trees', type=float, default=DEFAULTS['z_start'])
parser.add_argument('--z_stop', help='Final redshift (inclusive)', type=float, default=DEFAULTS['z_stop'])
parser.add_argument('--merger_parent', help='Merger tree directory', default=(DEFAULTS['merger_parent']))
parser.add_argument('--catalog_parent', help='Light cone catalog directory', default=(DEFAULTS['catalog_parent']))
parser.add_argument('--superslab_start', help='Initial superslab where we start building the trees', type=int, default=DEFAULTS['superslab_start'])
parser.add_argument('--resume', help='Resume the calculation from the checkpoint on disk', action='store_true')
parser.add_argument('--plot', help='Want to show plots', action='store_true')
parser.add_argument('--complete', help='Interpolate the halos from z = 0.1 to interpolate to z = 0', action='store_true')
args = vars(parser.parse_args())
main(**args)