-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile_tf
120 lines (110 loc) · 3.02 KB
/
Snakefile_tf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
configfile: "config.yaml"
rule all:
input:
"read_stats/nanostat.out",
"read_sample/read.sample.fastq",
"data/reads/reads.fa",
"nanosim/read_analysis.out",
"nanosim/simulator.out",
"minimap2/simulated_reads.genome.aln.sam",
"ngmlr/simulated_reads.genome.aln.sam"
rule read_stats:
input:
fa="data/genome/genome.fa.gz",
fq="data/reads/reads.fastq.gz"
output:
"read_stats/nanostat.out"
threads:
4
resources:
mem_mb=8
log:
"logs/read_stats/nanostat.log"
shell:
"NanoStat --fastq {input.fq} -t {threads} > {output} 2> {log}"
rule read_sample:
input:
"data/reads/reads.fastq.gz"
output:
"read_sample/read.sample.fastq"
threads:
1
params:
nbreads=config["nbreads"]
log:
"logs/read_sample/seqtk.sample.log"
shell:
"seqtk sample -s$RANDOM {input} {params.nbreads} > {output} 2> {log}"
rule convert:
input:
"data/reads/reads.fastq.gz"
output:
"data/reads/reads.fa"
threads:
1
log:
"logs/convert/seqtk.convert.log"
shell:
"seqtk seq -a {input} > {output} 2> {log}"
rule read_analysis:
input:
reads="data/reads/reads.fa",
genome="data/genome/genome.fa.gz"
output:
out1="nanosim/read_analysis.out",
out2="nanosim/training"
threads:
1
log:
"logs/nanosim/read_analysis.log"
shell:
"read_analysis.py -i {input.reads} -r {input.genome} -o {output.out2} > {output.out1} 2> {log}"
rule unzip:
input:
genome="data/genome/genome.fa.gz"
output:
temp("unzip/genome.fa")
shell:
"gunzip {input.genome} -c > {output}"
rule simulator:
input:
genome="unzip/genome.fa",
readanal="nanosim/read_analysis.out",
train="nanosim/training"
output:
nanoout=temp("nanosim/{sample}_simulated.out"),
fastaout="nanosim/{sample}_reads.fa.gz"
log:
"logs/nanosim/simulator.log"
shell:
"simulator.py linear -r {input.genome} -n 10000 -c {input.train} "
"-o {output.nanoout} > {output.nanoout}.log 2> {log};"
"gzip {output.out2}_reads.fasta -c > {output.fastaout};"
rule minimap2:
input:
genome="data/genome/genome.fa.gz",
reads="nanosim/simulated_reads.fa.gz"
output:
"minimap2/simulated_reads.genome.aln.sam"
threads:
4
resources:
mem_mb=30
log:
"logs/minimap2/simulated_reads.genome.aln.log"
shell:
"minimap2 -t {threads} -ax map-ont {input.genome} {input.reads} > {output} 2> {log}"
rule ngmlr:
input:
genome="data/genome/genome.fa.gz",
reads="nanosim/simulated_reads.fa.gz"
output:
"ngmlr/simulated_reads.genome.aln.sam"
threads:
4
resources:
mem_mb=30
log:
"logs/ngmlr/simulated_reads.genome.aln.log"
shell:
"ngmlr -t {threads} -r {input.genome} -q {input.reads} -o {output} -x ont 2> {log}"