-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
149 lines (109 loc) · 4.01 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env nextflow
// parse input data
if(has_extension(params.input, ".csv")){
csv_file = file(params.input, checkIfExists: true)
ch_input = extract_data(csv_file)
}else{
exit 1, "error: The sample input file must have the extension '.csv'."
}
// stage input data
( ch_qc_reads, ch_raw_reads) = ch_input.into(2)
ch_fasta = Channel.value(file(params.fasta))
ch_gtf = Channel.value(file(params.gtf))
process FASTQC{
tag "${base}"
publishDir params.outdir, mode: 'copy',
saveAs: { params.save_qc_intermediates ? "fastqc/${it}" : null }
when:
params.run_qc
input:
tuple val(base), file(reads) from ch_qc_reads
output:
tuple val(base), file("*.{html,zip}") into ch_multiqc
script:
"""
fastqc -q $reads
"""
}
process MULTIQC{
publishDir "${params.outdir}/multiqc", mode: 'copy'
when:
params.run_qc
input:
file(htmls) from ch_multiqc.collect()
output:
file("multiqc_report.html") into multiqc_out
script:
"""
multiqc .
"""
}
process TX{
publishDir params.outdir, mode: 'copy',
saveAs: { params.save_transcriptome ? "reference/transcriptome/${it}" : null }
when:
!params.transcriptome && params.fasta
input:
file(fasta) from ch_fasta
file(gtf) from ch_gtf
output:
file("${fasta.baseName}.tx.fa") into transcriptome_created
script:
"""
gffread -F -w "${fasta.baseName}.tx.fa" -g $fasta $gtf
"""
}
ch_transcriptome = params.transcriptome ? Channel.value(file(params.transcriptome)) : transcriptome_created
process INDEX{
publishDir params.outdir, mode: 'copy',
saveAs: { params.save_index ? "reference/index/${it}" : null }
when:
!params.kallisto_index
input:
file(tx) from ch_transcriptome
output:
file("*.idx") into index_created
script:
"""
kallisto index -i ${tx.simpleName}.idx $tx
"""
}
ch_index = params.kallisto_index ? Channel.value(file(params.kallisto_index)) : index_created
/*
================================================================================
AUXILLARY FUNCTIONS
================================================================================
*/
// Check if a row has the expected number of item
def checkNumberOfItem(row, number) {
if (row.size() != number) exit 1, "error: Invalid CSV input - malformed row (e.g. missing column) in ${row}, consult documentation."
return true
}
// Return file if it exists
def return_file(it) {
if (!file(it).exists()) exit 1, "error: Cannot find supplied FASTQ input file. Check file: ${it}"
return file(it)
}
// Check file extension
def has_extension(it, extension) {
it.toString().toLowerCase().endsWith(extension.toLowerCase())
}
// Parse samples.csv file
def extract_data(csvFile){
Channel
.fromPath(csvFile)
.splitCsv(header: true, sep: ',')
.map{ row ->
def expected_keys = ["Sample_ID", "Read1", "Read2"]
if(!row.keySet().containsAll(expected_keys)) exit 1, "error: Invalid CSV input - malformed column names. Please use the column names 'Sample_ID', 'Read1', 'Read2'."
checkNumberOfItem(row, 3)
def samples = row.Sample_ID
def read1 = row.Read1.matches('NA') ? 'NA' : return_file(row.Read1)
def read2 = row.Read2.matches('NA') ? 'NA' : return_file(row.Read2)
if( samples == '' || read1 == '' || read2 == '' ) exit 1, "error: a field does not contain any information. Please check your CSV file"
if( !has_extension(read1, "fastq.gz") && !has_extension(read1, "fq.gz") && !has_extension(read1, "fastq") && !has_extension(read1, "fq")) exit 1, "error: A R1 file has a non-recognizable FASTQ extension. Check: ${r1}"
if( !has_extension(read2, "fastq.gz") && !has_extension(read2, "fq.gz") && !has_extension(read2, "fastq") && !has_extension(read2, "fq")) exit 1, "error: A R2 file has a non-recognizable FASTQ extension. Check: ${r2}"
// output tuple mimicking fromFilePairs
[ samples, [read1, read2] ]
}
}