From fc40193f681655422d81e90aadf69a84dc027981 Mon Sep 17 00:00:00 2001
From: ryanjameskennedy <ryanjameskennedy@icloud.com>
Date: Tue, 12 Nov 2024 16:53:29 +0100
Subject: [PATCH 1/4] Fix .first() bug

---
 workflows/bacterial_base.nf | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/workflows/bacterial_base.nf b/workflows/bacterial_base.nf
index 4b6e654f..55ff0091 100644
--- a/workflows/bacterial_base.nf
+++ b/workflows/bacterial_base.nf
@@ -26,6 +26,7 @@ workflow CALL_BACTERIAL_BASE {
         referenceGenome
         referenceGenomeDir
         inputSamples
+        targetSampleSize
     
     main:
         ch_versions = Channel.empty()
@@ -42,11 +43,12 @@ workflow CALL_BACTERIAL_BASE {
             .map{ row -> get_reads(row) }
             .set{ ch_raw_reads }
 
-        // downsample reads
-        seqtk_sample( ch_raw_reads.map(row -> [row[0], row[1], params.targetSampleSize]) ).reads
-            .concat( ch_raw_reads )    // add raw reads channel
-            .first()                   // if seqtk was not run the first row is the raw reads
-            .set{ ch_sampled_reads }  // create sampled reads channel
+        if ( params.targetSampleSize ) {
+            // downsample reads
+            seqtk_sample( ch_raw_reads, targetSampleSize ).reads.set{ ch_sampled_reads }
+        } else {
+            ch_raw_reads.set{ ch_sampled_reads }
+        }
 
         // reads trim and clean and recreate reads channel if the reads were trimmed
         assembly_trim_clean(ch_sampled_reads.join(ch_meta)).set { ch_clean_reads_w_meta }

From 38db9eceba09bb47446e00bc88ec19e2ff2d0b9b Mon Sep 17 00:00:00 2001
From: ryanjameskennedy <ryanjameskennedy@icloud.com>
Date: Tue, 12 Nov 2024 16:55:11 +0100
Subject: [PATCH 2/4] Make targetSampleSize its own channel in seqtk_sample

---
 nextflow-modules/modules/seqtk/main.nf  | 3 ++-
 workflows/escherichia_coli.nf           | 4 +++-
 workflows/klebsiella_pneumoniae.nf      | 4 +++-
 workflows/mycobacterium_tuberculosis.nf | 4 +++-
 workflows/staphylococcus_aureus.nf      | 4 +++-
 workflows/streptococcus.nf              | 3 ++-
 workflows/streptococcus_pyogenes.nf     | 3 ++-
 7 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/nextflow-modules/modules/seqtk/main.nf b/nextflow-modules/modules/seqtk/main.nf
index 6ba8602c..80e85b97 100644
--- a/nextflow-modules/modules/seqtk/main.nf
+++ b/nextflow-modules/modules/seqtk/main.nf
@@ -3,7 +3,8 @@ process seqtk_sample {
   scratch params.scratch
 
   input:
-    tuple val(sampleID), path(reads), val(sample_size)
+    tuple val(sampleID), path(reads)
+    val sample_size
 
   output:
     tuple val(sampleID), path("*.fastq.gz"), emit: reads
diff --git a/workflows/escherichia_coli.nf b/workflows/escherichia_coli.nf
index a6a58f11..84f34e41 100644
--- a/workflows/escherichia_coli.nf
+++ b/workflows/escherichia_coli.nf
@@ -45,11 +45,13 @@ workflow CALL_ESCHERICHIA_COLI {
     serotypefinderDb = file(params.serotypefinderDb, checkIfExists: true)
     shigapassDb = file(params.shigapassDb, checkIfExists: true)
     virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true)
+    // schemas and values
+    targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([])
 
     main:
         ch_versions = Channel.empty()
 
-        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples )
+        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize )
         
         CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly}
         CALL_BACTERIAL_BASE.out.reads.set{ch_reads}
diff --git a/workflows/klebsiella_pneumoniae.nf b/workflows/klebsiella_pneumoniae.nf
index ce5e357b..00ed38bc 100644
--- a/workflows/klebsiella_pneumoniae.nf
+++ b/workflows/klebsiella_pneumoniae.nf
@@ -43,11 +43,13 @@ workflow CALL_KLEBSIELLA_PNEUMONIAE {
     pointfinderDb = file(params.pointfinderDb, checkIfExists: true)
     serotypefinderDb = file(params.serotypefinderDb, checkIfExists: true)
     virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true)
+    // schemas and values
+    targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([])
 
     main:
         ch_versions = Channel.empty()
 
-        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples )
+        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize )
         
         CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly}
         CALL_BACTERIAL_BASE.out.reads.set{ch_reads}
diff --git a/workflows/mycobacterium_tuberculosis.nf b/workflows/mycobacterium_tuberculosis.nf
index f9c934e6..df48d4a0 100644
--- a/workflows/mycobacterium_tuberculosis.nf
+++ b/workflows/mycobacterium_tuberculosis.nf
@@ -33,11 +33,13 @@ workflow CALL_MYCOBACTERIUM_TUBERCULOSIS {
     tbdbBed = file(params.tbdbBed, checkIfExists: true)
     tbdbBedIdx = file(params.tbdbBedIdx, checkIfExists: true)
     tbGradingRulesBed = file(params.tbGradingRulesBed, checkIfExists: true)
+    // schemas and values
+    targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([])
 
     main:
         ch_versions = Channel.empty()
 
-        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples )
+        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize )
 
         CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly}
         CALL_BACTERIAL_BASE.out.reads.set{ch_reads}
diff --git a/workflows/staphylococcus_aureus.nf b/workflows/staphylococcus_aureus.nf
index fd1a6fba..562d6197 100644
--- a/workflows/staphylococcus_aureus.nf
+++ b/workflows/staphylococcus_aureus.nf
@@ -41,11 +41,13 @@ workflow CALL_STAPHYLOCOCCUS_AUREUS {
     resfinderDb = file(params.resfinderDb, checkIfExists: true)
     pointfinderDb = file(params.pointfinderDb, checkIfExists: true)
     virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true)
+    // schemas and values
+    targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([])
 
     main:
         ch_versions = Channel.empty()
 
-        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples )
+        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize )
         
         CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly}
         CALL_BACTERIAL_BASE.out.reads.set{ch_reads}
diff --git a/workflows/streptococcus.nf b/workflows/streptococcus.nf
index 031f62bb..4e258cdb 100644
--- a/workflows/streptococcus.nf
+++ b/workflows/streptococcus.nf
@@ -53,11 +53,12 @@ workflow CALL_STREPTOCOCCUS {
     mlstScheme = params.mlstScheme ? params.mlstScheme : Channel.of([])
     species = params.species ? params.species : Channel.of([])
     speciesDir = params.speciesDir ? params.speciesDir : Channel.of([])
+    targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([])
 
     main:
         ch_versions = Channel.empty()
 
-        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples )
+        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize )
         
         CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly}
         CALL_BACTERIAL_BASE.out.reads.set{ch_reads}
diff --git a/workflows/streptococcus_pyogenes.nf b/workflows/streptococcus_pyogenes.nf
index 35ccb290..e9af0367 100644
--- a/workflows/streptococcus_pyogenes.nf
+++ b/workflows/streptococcus_pyogenes.nf
@@ -47,11 +47,12 @@ workflow CALL_STREPTOCOCCUS_PYOGENES {
     mlstScheme = params.mlstScheme ? params.mlstScheme : Channel.of([])
     species = params.species ? params.species : Channel.of([])
     speciesDir = params.speciesDir ? params.speciesDir : Channel.of([])
+    targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([])
 
     main:
         ch_versions = Channel.empty()
 
-        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples )
+        CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize )
         
         CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly}
         CALL_BACTERIAL_BASE.out.reads.set{ch_reads}

From e274208db4586c14be3d1b018984d18f97a3fc0a Mon Sep 17 00:00:00 2001
From: ryanjameskennedy <ryanjameskennedy@icloud.com>
Date: Tue, 12 Nov 2024 16:56:14 +0100
Subject: [PATCH 3/4] Add symlinkDir to dev config spyogenes

---
 configs/nextflow.dev.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configs/nextflow.dev.config b/configs/nextflow.dev.config
index d8154a68..08c8fa79 100644
--- a/configs/nextflow.dev.config
+++ b/configs/nextflow.dev.config
@@ -111,6 +111,7 @@ profiles {
 		params.species = 'streptococcus pyogenes'
 		params.speciesDir = 'spyogenes'
 		params.mlstScheme = 'spyogenes'
+		params.symlinkDir = "/access/jasen/spyogenes/"
 		params.referenceGenome = "${params.root}/assets/genomes/streptococcus_pyogenes/GCF_000006785.2.fasta"
 		params.referenceGenomeIdx = "${params.root}/assets/genomes/streptococcus_pyogenes/GCF_000006785.2.fasta.fai"
 		params.referenceGenomeGff = "${params.root}/assets/genomes/streptococcus_pyogenes/GCF_000006785.2.gff"

From e3cab12697e97cf69808ea15367ebf515e277b71 Mon Sep 17 00:00:00 2001
From: ryanjameskennedy <ryanjameskennedy@icloud.com>
Date: Tue, 12 Nov 2024 16:57:05 +0100
Subject: [PATCH 4/4] Update CHANGELOG.md re .first() bug fix

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 51149023..eb08ca31 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed resfinder `--species` arg
 - Fixed `nextflow.hopper.config` `symlinkDir`
 - Removed serotypefinder from saureus workflow
+- Fixed jasen running only on the first row/sample in csv
 
 ### Changed