-
Notifications
You must be signed in to change notification settings - Fork 8
/
SConscript.simulation
executable file
·129 lines (115 loc) · 6.16 KB
/
SConscript.simulation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#! /usr/bin/env python
# -*- coding: utf-8 -*-
'''Simulate tree'''
import os
from nestly import Nest
from nestly.scons import SConsWrap
# the following must be exported by parent SConstruct/SConscript
Import('env tool_dict quick idlabel outdir naive random_naive mutability substitution lambda_list lambda0_list seed n N T nsim CommandRunner experimental_list naiveIDexp selection_param xarg buffarg uniform_mut')
nest = Nest()
w = SConsWrap(nest, outdir, alias_environment=env)
w.add_aggregate('outeragg', list) # <-- aggregating results across parameters
# Initialize our first nest level,
w.add('lambda', lambda_list)
w.add('lambda0', lambda0_list)
w.add_aggregate('agg', list) # <-- for aggregating inference result from repeated simulations with the same parameters
w.add_aggregate('simstat_agg', list) # <-- for aggregating simulation stats with the same parameters
w.add_aggregate('summary_stats', list) # <-- summary stats for CFT study (no abundance)
# next nest, just random simulation reruns, same params
w.add('runs', range(1, nsim+1))
@w.add_target()
def gc_sim(outdir, c):
'''
GCtree simulation
NOTE: xvfb-run is needed because of issue https://github.com/etetoolkit/ete/issues/101
'''
idlabel_arg = ' --idlabel' if idlabel else ''
T_arg = ' --obs_times {} '.format(' '.join(map(str, T))) if T else ''
N_arg = ' --n_final_seqs {} '.format(N) if N is not None else ''
n_arg = ' --n_to_downsample {} '.format(' '.join(map(str, n))) if n != [] else ''
seed_arg = ' --random_seed {} '.format(seed + c['runs']) if seed is not None else ''
random_seq = ' --naive_seq_file {} '.format(random_naive) if random_naive is not None else ' --naive_seq {} '.format(naive)
mut_sub = ' --mutability {} --substitution {} '.format(mutability, substitution) if not uniform_mut else ' --mutability none --substitution none '
sele_arg = ' --selection --target_distance {} --target_count {} --carry_cap {} --skip_update {} {}'.format(*selection_param) if selection_param is not None else ''
tgt = CommandRunner([os.path.join(outdir, 'GCsim.fasta'),
os.path.join(outdir, 'GCsim_stats.tsv'),
os.path.join(outdir, 'GCsim_collapsed_tree.p'),
os.path.join(outdir, 'GCsim_collapsed_tree_colormap.tsv'),
os.path.join(outdir, 'GCsim_collapsed_tree_colormap.p'),
os.path.join(outdir, 'simulator.log')],
'',
xarg + buffarg + 'python bin/simulator.py '+mut_sub+
' --outbase '+os.path.join(outdir, 'GCsim')+
' --lambda {0[lambda]} --lambda0 {0[lambda0]}'.format(c)+
idlabel_arg+
T_arg+
N_arg+
n_arg+
seed_arg+
sele_arg+
random_seq+
' > ${TARGETS[5]}')
c['simstat_agg'].append(tgt[1])
c['summary_stats'].append(tgt[0])
return tgt
@w.add_target()
def infer(outdir, c):
'''Now do inference on the simulation results.'''
fasta = c['gc_sim'][0]
naiveID = 'naive' # This is what the simulation routine names the naive
converter = None # No problem with weird fasta formating from the simulated data
colormap = c['gc_sim'][4]
return SConscript('SConscript.inference', exports='env tool_dict quick idlabel fasta outdir naiveID converter CommandRunner xarg buffarg colormap mutability substitution')
@w.add_target()
def validate(outdir, c):
'''Do validation.'''
outputs = [os.path.join(outdir, 'validation.tsv'), # <-- this one compares different methods
os.path.join(outdir, 'validation.log')]
if tool_dict['gctree']:
outputs.append(os.path.join(outdir, 'validation_gctree.tsv')) # <-- special gctree output evaluating parsimony tree ranking
tgt = CommandRunner(outputs,
[c['gc_sim'][2:4]] + [x[0] for x in c['infer'][1:]], # <--- Notice the first instance is skipped (this is the phylip run)
xarg + buffarg + 'python bin/validation.py $SOURCES --outbase '+os.path.join(outdir, 'validation')+' > ${TARGETS[1]}')
c['agg'].append(tgt[0])
return tgt
w.pop('runs')
@w.add_target()
def simstat_aggregate(outdir, c):
'''aggregate simulation stats'''
tgt = env.Command([os.path.join(outdir, 'simulation_stats.log')],
c['simstat_agg'],
buffarg + 'python bin/simstatagg.py $SOURCES --outbase '+os.path.join(outdir, 'simulation.stats')+
' --experimental sequence_data/150228_Clone_3-8.fasta > ${TARGETS[0]}')
env.AlwaysBuild(tgt)
return tgt
@w.add_target()
def summary_stats(outdir, c):
'''aggregate simulation stats CFT version'''
exp_arg = ' --experimental ' + ' '.join(experimental_list) if len(experimental_list) > 0 else ''
tgt = env.Command([os.path.join(outdir, 'simulation_summary_stats.log')],
c['summary_stats'],
buffarg + 'python bin/summary_stats.py $SOURCES --outbase '+os.path.join(outdir, 'simulation_summary_stats')+
exp_arg +
' --naiveIDexp ' + naiveIDexp + ' > ${TARGETS[0]}')
env.AlwaysBuild(tgt)
return tgt
@w.add_target()
def inner_aggregate(outdir, c):
'''aggregate validation results'''
tgt = env.Command([os.path.join(outdir, 'validaggreg.tsv'),
os.path.join(outdir, 'validaggreg.log')],
c['agg'],
buffarg + 'python bin/validaggreg_compare.py $SOURCES --outbase '+os.path.join(outdir, 'validaggreg')+' > ${TARGETS[1]}')
env.AlwaysBuild(tgt)
c['outeragg'].append(tgt[0])
return tgt
w.pop('lambda')
@w.add_target()
def outer_aggregate(outdir, c):
'''aggregate different parameter values'''
tgt = env.Command([os.path.join(outdir, 'aggregation.tsv'),
os.path.join(outdir, 'aggregation.log')],
c['outeragg'],
buffarg + 'python bin/aggregate_compare.py $SOURCES --outbase '+os.path.join(outdir, 'aggregation')+' > ${TARGETS[1]}')
env.AlwaysBuild(tgt)
return tgt