nf-core · sofstam · Oct 10, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/docs/output.md b/docs/output.md
@@ -758,7 +758,9 @@ The pipeline can also generate input files for the following downstream pipeline
 <summary>Output files</summary>
 
 - `downstream_samplesheets/`
-  - `mag.csv`: input sheet for that contains paths to preprocessed FASTQs (corresponding to what is saved with `--save_analysis_ready_fastqs`) that can be used to skip read preprocessing steps in nf-core/mag
+  - `mag-{pe,se}.csv`: input sheet for single-end and paired-end reads that contains paths to preprocessed short-read FASTQs (corresponding to what is saved with `--save_analysis_ready_fastqs`) that can be used to skip read preprocessing steps in nf-core/mag.
+    - Note: if you merge reads, these will be listed in teh `mag-se.csv`.
+    - Note: the nf-core/mag mandatory `group` column is filled with a dummy ID (`0`), you may wish to change this depending on your nf-core/mag settings!
 
 </details>
 

diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -7,64 +7,62 @@ workflow SAMPLESHEET_MAG {
     ch_processed_reads
 
     main:
-    format     = 'csv' // most common format in nf-core
-    format_sep = ','
+    format = 'csv'
 
-    ch_list_for_samplesheet = ch_processed_reads.view()
-            .map {
-                        meta, sample_id, instrument_platform,fastq_1,fastq_2,fasta ->
-                            def sample        = meta.id
-                            def run           = meta.run_accession  //this should be optional
-                            def group         = "" 
-                            def short_reads_1 = file(params.outdir).toString() + '/' + meta.id + '/' + fastq_1.getName()
-                            def short_reads_2 = meta.single_end ? "" : file(params.outdir).toString() + '/' + meta.id + '/' + fastq_2.getName()
-                            def long_reads    = meta.is_fasta ? file(params.outdir).toString() + '/' + meta.id + '/' + fasta.getName() : ""
-                [sample: sample, run: run, group: group, short_reads_1: short_reads_1, short_reads_2: short_reads_2, long_reads: long_reads]
+    ch_list_for_samplesheet = ch_processed_reads
+        .dump()
+        .map { meta, reads ->
+            def sample = meta.id
+            def run = params.perform_runmerging ? '' : meta.run_accession
+            def group = "0"
+            //this should be optional
+            def short_reads_1 = meta.is_fasta ? "" : file(params.outdir).toString() + '/analysis_ready_fastqs/' + reads[0].getName()
+            def short_reads_2 = meta.is_fasta || meta.single_end ? "" : file(params.outdir).toString() + '/analysis_ready_fastqs/' + reads[1].getName()
+            def long_reads = meta.is_fasta ? file(params.outdir).toString() + '/analysis_ready_fastqs/' + reads[0].getName() : ""
+
+            [sample: sample, run: run, group: group, short_reads_1: short_reads_1, short_reads_2: short_reads_2, long_reads: long_reads]
         }
-        .view()
-        .tap{ ch_list_for_samplesheet_all }
-        .filter{ it.short_reads_1 != "" }
-        .branch{
+        .tap { ch_list_for_samplesheet_all }
+        .filter { it.short_reads_1 != "" }
+        .branch {
             se: it.short_reads_2 == ""
-            pe: true
-    }
+            pe: it.short_reads_2 != ""
+            unknown: true
+        }
 
     // Throw a warning that only long reads are not supported yet by MAG
     ch_list_for_samplesheet_all
-        .filter{ it.long_reads != "" && it.short_reads_1 == "" }
-        .collect{ log.warn("[nf-core/taxprofiler] WARNING: Standalone long reads are not yet supported by the nf-core/mag pipeline and will not be in present in `mag-*.csv`. Sample: ${it.sample}" )}
+        .filter { it.long_reads != "" && it.short_reads_1 == "" }
+        .collect { log.warn("[nf-core/taxprofiler] WARNING: Standalone long reads are not yet supported by the nf-core/mag pipeline and will not be in present in `mag-*.csv`. Sample: ${it.sample}") }
 
-    channelToSamplesheet(ch_list_for_samplesheet.pe,"${params.outdir}/downstream_samplesheets/mag-pe", format)
+    channelToSamplesheet(ch_list_for_samplesheet.pe, "${params.outdir}/downstream_samplesheets/mag-pe", format)
     channelToSamplesheet(ch_list_for_samplesheet.se, "${params.outdir}/downstream_samplesheets/mag-se", format)
-
 }
 
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
-
     take:
     ch_processed_reads
 
     main:
     def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
 
-    if ( downstreampipeline_names.contains('mag') && params.save_analysis_ready_fastqs) {
+    if (downstreampipeline_names.contains('mag') && params.save_analysis_ready_fastqs) {
         SAMPLESHEET_MAG(ch_processed_reads)
     }
-
 }
 
 // Constructs the header string and then the strings of each row, and
 def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
-    format_sep = ["csv":",", "tsv":"\t", "txt":"\t"][format]
+    def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]
 
-    ch_header = ch_list_for_samplesheet
+    def ch_header = ch_list_for_samplesheet
 
     ch_header
         .first()
-        .map{ it.keySet().join(format_sep) }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .map { it.keySet().join(format_sep) }
+        .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
         .collectFile(
-            name:"${path}.${format}",
+            name: "${path}.${format}",
             newLine: true,
             sort: false
         )

diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
@@ -149,7 +149,17 @@ workflow PIPELINE_COMPLETION {
 //
 def validateInputParameters() {
     genomeExistsError()
-}//
+
+    if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
+        error('[nf-core/taxprofiler] ERROR: If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets`! Check input.')
+    }
+
+    if ( params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets.split(",").contains('mag') && !params.save_analysis_ready_fastqs ) {
+        error("[nf-core/taxprofiler] ERROR: To generate downstream samplesheets for nf-core/mag, you must also specify `--save_analysis_ready_fastqs`")
+    }
+}
+
+//
 // Validate channels from input samplesheet
 //
 def validateInputSamplesheet(input) {

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
@@ -334,7 +334,7 @@ workflow TAXPROFILER {
     // Samplesheet generation
     //
     if ( params.generate_downstream_samplesheets ) {
-        GENERATE_DOWNSTREAM_SAMPLESHEETS ( samplesheet )
+        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_reads_runmerged )
     }
 
     //