From fc40193f681655422d81e90aadf69a84dc027981 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 12 Nov 2024 16:53:29 +0100 Subject: [PATCH 1/4] Fix .first() bug --- workflows/bacterial_base.nf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/workflows/bacterial_base.nf b/workflows/bacterial_base.nf index 4b6e654f..55ff0091 100644 --- a/workflows/bacterial_base.nf +++ b/workflows/bacterial_base.nf @@ -26,6 +26,7 @@ workflow CALL_BACTERIAL_BASE { referenceGenome referenceGenomeDir inputSamples + targetSampleSize main: ch_versions = Channel.empty() @@ -42,11 +43,12 @@ workflow CALL_BACTERIAL_BASE { .map{ row -> get_reads(row) } .set{ ch_raw_reads } - // downsample reads - seqtk_sample( ch_raw_reads.map(row -> [row[0], row[1], params.targetSampleSize]) ).reads - .concat( ch_raw_reads ) // add raw reads channel - .first() // if seqtk was not run the first row is the raw reads - .set{ ch_sampled_reads } // create sampled reads channel + if ( params.targetSampleSize ) { + // downsample reads + seqtk_sample( ch_raw_reads, targetSampleSize ).reads.set{ ch_sampled_reads } + } else { + ch_raw_reads.set{ ch_sampled_reads } + } // reads trim and clean and recreate reads channel if the reads were trimmed assembly_trim_clean(ch_sampled_reads.join(ch_meta)).set { ch_clean_reads_w_meta } From 38db9eceba09bb47446e00bc88ec19e2ff2d0b9b Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 12 Nov 2024 16:55:11 +0100 Subject: [PATCH 2/4] Make targetSampleSize its own channel in seqtk_sample --- nextflow-modules/modules/seqtk/main.nf | 3 ++- workflows/escherichia_coli.nf | 4 +++- workflows/klebsiella_pneumoniae.nf | 4 +++- workflows/mycobacterium_tuberculosis.nf | 4 +++- workflows/staphylococcus_aureus.nf | 4 +++- workflows/streptococcus.nf | 3 ++- workflows/streptococcus_pyogenes.nf | 3 ++- 7 files changed, 18 insertions(+), 7 deletions(-) diff --git a/nextflow-modules/modules/seqtk/main.nf b/nextflow-modules/modules/seqtk/main.nf index 6ba8602c..80e85b97 100644 --- a/nextflow-modules/modules/seqtk/main.nf +++ b/nextflow-modules/modules/seqtk/main.nf @@ -3,7 +3,8 @@ process seqtk_sample { scratch params.scratch input: - tuple val(sampleID), path(reads), val(sample_size) + tuple val(sampleID), path(reads) + val sample_size output: tuple val(sampleID), path("*.fastq.gz"), emit: reads diff --git a/workflows/escherichia_coli.nf b/workflows/escherichia_coli.nf index a6a58f11..84f34e41 100644 --- a/workflows/escherichia_coli.nf +++ b/workflows/escherichia_coli.nf @@ -45,11 +45,13 @@ workflow CALL_ESCHERICHIA_COLI { serotypefinderDb = file(params.serotypefinderDb, checkIfExists: true) shigapassDb = file(params.shigapassDb, checkIfExists: true) virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true) + // schemas and values + targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([]) main: ch_versions = Channel.empty() - CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples ) + CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize ) CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly} CALL_BACTERIAL_BASE.out.reads.set{ch_reads} diff --git a/workflows/klebsiella_pneumoniae.nf b/workflows/klebsiella_pneumoniae.nf index ce5e357b..00ed38bc 100644 --- a/workflows/klebsiella_pneumoniae.nf +++ b/workflows/klebsiella_pneumoniae.nf @@ -43,11 +43,13 @@ workflow CALL_KLEBSIELLA_PNEUMONIAE { pointfinderDb = file(params.pointfinderDb, checkIfExists: true) serotypefinderDb = file(params.serotypefinderDb, checkIfExists: true) virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true) + // schemas and values + targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([]) main: ch_versions = Channel.empty() - CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples ) + CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize ) CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly} CALL_BACTERIAL_BASE.out.reads.set{ch_reads} diff --git a/workflows/mycobacterium_tuberculosis.nf b/workflows/mycobacterium_tuberculosis.nf index f9c934e6..df48d4a0 100644 --- a/workflows/mycobacterium_tuberculosis.nf +++ b/workflows/mycobacterium_tuberculosis.nf @@ -33,11 +33,13 @@ workflow CALL_MYCOBACTERIUM_TUBERCULOSIS { tbdbBed = file(params.tbdbBed, checkIfExists: true) tbdbBedIdx = file(params.tbdbBedIdx, checkIfExists: true) tbGradingRulesBed = file(params.tbGradingRulesBed, checkIfExists: true) + // schemas and values + targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([]) main: ch_versions = Channel.empty() - CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples ) + CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize ) CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly} CALL_BACTERIAL_BASE.out.reads.set{ch_reads} diff --git a/workflows/staphylococcus_aureus.nf b/workflows/staphylococcus_aureus.nf index fd1a6fba..562d6197 100644 --- a/workflows/staphylococcus_aureus.nf +++ b/workflows/staphylococcus_aureus.nf @@ -41,11 +41,13 @@ workflow CALL_STAPHYLOCOCCUS_AUREUS { resfinderDb = file(params.resfinderDb, checkIfExists: true) pointfinderDb = file(params.pointfinderDb, checkIfExists: true) virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true) + // schemas and values + targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([]) main: ch_versions = Channel.empty() - CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples ) + CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize ) CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly} CALL_BACTERIAL_BASE.out.reads.set{ch_reads} diff --git a/workflows/streptococcus.nf b/workflows/streptococcus.nf index 031f62bb..4e258cdb 100644 --- a/workflows/streptococcus.nf +++ b/workflows/streptococcus.nf @@ -53,11 +53,12 @@ workflow CALL_STREPTOCOCCUS { mlstScheme = params.mlstScheme ? params.mlstScheme : Channel.of([]) species = params.species ? params.species : Channel.of([]) speciesDir = params.speciesDir ? params.speciesDir : Channel.of([]) + targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([]) main: ch_versions = Channel.empty() - CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples ) + CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize ) CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly} CALL_BACTERIAL_BASE.out.reads.set{ch_reads} diff --git a/workflows/streptococcus_pyogenes.nf b/workflows/streptococcus_pyogenes.nf index 35ccb290..e9af0367 100644 --- a/workflows/streptococcus_pyogenes.nf +++ b/workflows/streptococcus_pyogenes.nf @@ -47,11 +47,12 @@ workflow CALL_STREPTOCOCCUS_PYOGENES { mlstScheme = params.mlstScheme ? params.mlstScheme : Channel.of([]) species = params.species ? params.species : Channel.of([]) speciesDir = params.speciesDir ? params.speciesDir : Channel.of([]) + targetSampleSize = params.targetSampleSize ? params.targetSampleSize : Channel.of([]) main: ch_versions = Channel.empty() - CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples ) + CALL_BACTERIAL_BASE( coreLociBed, referenceGenome, referenceGenomeDir, inputSamples, targetSampleSize ) CALL_BACTERIAL_BASE.out.assembly.set{ch_assembly} CALL_BACTERIAL_BASE.out.reads.set{ch_reads} From e274208db4586c14be3d1b018984d18f97a3fc0a Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 12 Nov 2024 16:56:14 +0100 Subject: [PATCH 3/4] Add symlinkDir to dev config spyogenes --- configs/nextflow.dev.config | 1 + 1 file changed, 1 insertion(+) diff --git a/configs/nextflow.dev.config b/configs/nextflow.dev.config index d8154a68..08c8fa79 100644 --- a/configs/nextflow.dev.config +++ b/configs/nextflow.dev.config @@ -111,6 +111,7 @@ profiles { params.species = 'streptococcus pyogenes' params.speciesDir = 'spyogenes' params.mlstScheme = 'spyogenes' + params.symlinkDir = "/access/jasen/spyogenes/" params.referenceGenome = "${params.root}/assets/genomes/streptococcus_pyogenes/GCF_000006785.2.fasta" params.referenceGenomeIdx = "${params.root}/assets/genomes/streptococcus_pyogenes/GCF_000006785.2.fasta.fai" params.referenceGenomeGff = "${params.root}/assets/genomes/streptococcus_pyogenes/GCF_000006785.2.gff" From e3cab12697e97cf69808ea15367ebf515e277b71 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 12 Nov 2024 16:57:05 +0100 Subject: [PATCH 4/4] Update CHANGELOG.md re .first() bug fix --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51149023..eb08ca31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed resfinder `--species` arg - Fixed `nextflow.hopper.config` `symlinkDir` - Removed serotypefinder from saureus workflow +- Fixed jasen running only on the first row/sample in csv ### Changed