From 21d6cfc6f08340441b32ea4d93d13cb426bb2deb Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 27 Mar 2024 16:54:53 +0100 Subject: [PATCH 01/45] fix refactor references --- conf/modules/prepare_references.config | 55 +------------- conf/test.config | 4 +- subworkflows/local/prepare_references.nf | 96 +++++++++--------------- workflows/tomte.nf | 63 +++++++++------- 4 files changed, 75 insertions(+), 143 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 5f2932cd..f4d7d489 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -16,17 +16,6 @@ process { - withName: '.*PREPARE_REFERENCES:GUNZIP_FASTA' { - ext.when = {params.fasta.endsWith(".gz")} - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*.{fasta,fa}" - ] - } - withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' { ext.when = { !params.fai } ext.args2 = { '--no-same-owner' } @@ -68,16 +57,6 @@ process { ] } - withName: '.*PREPARE_REFERENCES:UNTAR_STAR_INDEX' { - ext.when = { params.star_index && params.star_index.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - withName: '.*PREPARE_REFERENCES:BUILD_STAR_GENOME' { ext.when = { !params.star_index } publishDir = [ @@ -88,17 +67,6 @@ process { ] } - withName: '.*PREPARE_REFERENCES:GUNZIP_GTF' { - ext.when = { params.gtf && params.gtf.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*gtf" - ] - } - withName: '.*PREPARE_REFERENCES:GTF_TO_REFFLAT' { ext.args = { [ '-genePredExt', @@ -123,23 +91,12 @@ process { } withName: '.*PREPARE_REFERENCES:UNTAR_VEP_CACHE' { - ext.when = { (params.vep_cache && params.vep_cache.endsWith("tar.gz")) } + ext.when = { (params.vep_cache.endsWith("tar.gz")) } publishDir = [ enabled: false ] } - withName: '.*PREPARE_REFERENCES:GUNZIP_TRFASTA' { - ext.when = { params.transcript_fasta && params.transcript_fasta.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*.{fasta,fa}" - ] - } - withName: '.*PREPARE_REFERENCES:GFFREAD' { ext.when = { !params.transcript_fasta } ext.args = { '-w' } @@ -161,14 +118,4 @@ process { ] } - withName: '.*PREPARE_REFERENCES:UNTAR_SALMON_INDEX' { - ext.when = { params.salmon_index.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } diff --git a/conf/test.config b/conf/test.config index eeb61a26..c9cc7be8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,8 +25,8 @@ params { // Genome references genome = 'GRCh37' - fasta = "${projectDir}/test_data/grch37_chr21.fa.gz" - gtf = "${projectDir}/test_data/grch37_chr21.gtf.gz" + fasta = "${projectDir}/test_data/grch37_chr21.fa" + gtf = "${projectDir}/test_data/grch37_chr21.gtf" // Other parameters star_two_pass_mode = 'None' diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 09e2ace0..121720b8 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -8,86 +8,61 @@ include { GET_RRNA_TRANSCRIPTS } from '../../modules/loc include { GTFTOGENEPRED_REFFLAT as GTF_TO_REFFLAT } from '../../modules/local/gtftorefflat' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' include { GFFREAD } from '../../modules/local/gffread' -include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_GTF } from '../../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_TRFASTA } from '../../modules/nf-core/gunzip/main' include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main' include { STAR_GENOMEGENERATE as BUILD_STAR_GENOME } from '../../modules/nf-core/star/genomegenerate/main' -include { UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/untar/main' -include { UNTAR as UNTAR_SALMON_INDEX } from '../../modules/nf-core/untar/main' include { UNTAR as UNTAR_VEP_CACHE } from '../../modules/nf-core/untar/main' include { SALMON_INDEX as SALMON_INDEX } from '../../modules/nf-core/salmon/index/main' workflow PREPARE_REFERENCES { take: - fasta // parameter: [mandatory] path to fasta - fai // channel: [optional] [ val(meta), path(fai) ] - star_index // parameter: [optional] path to star_index - gtf // parameter: [mandatory] path to gtf - ch_vep_cache // channel: [optional] [ path(vep_cache) ] - transcript_fasta // parameter: [optional] path to transcript_fasta - salmon_index // parameter: [optional] path to salmon_index + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai_input // channel: [optional] [ val(meta), path(fai) ] + ch_star_index_input // channel: [optional] [ val(meta), path(star_index) ] + ch_gtf // channel: [mandatory] [ val(meta), path(gtf) ] + ch_vep_cache_input // channel: [optional] [ path(vep_cache) ] + ch_transcript_fasta_input // channel: [optional] [ path(transcript_fasta) ] + ch_salmon_index_input // channel: [optional] [ path(salmon_index) ] + ch_sequence_dict_input // channel: [optional] [ val(meta), path(dict) ] main: ch_versions = Channel.empty() - fasta_meta = Channel.fromPath(fasta).map { it -> [ [id:it[0] ], it] }.collect() - GUNZIP_FASTA(fasta_meta) - ch_fasta = fasta.endsWith(".gz") ? GUNZIP_FASTA.out.gunzip.collect() : fasta_meta - - // If no genome indices, create it SAMTOOLS_FAIDX_GENOME(ch_fasta,[[],[]]) - ch_fai = fai.mix(SAMTOOLS_FAIDX_GENOME.out.fai).collect() + ch_fai = ch_fai_input.mix(SAMTOOLS_FAIDX_GENOME.out.fai).collect() + // If no dictionary, create it BUILD_DICT(ch_fasta) - ch_dict = BUILD_DICT.out.dict.collect() - - gtf_meta = Channel.fromPath(gtf).map{ it -> [ [id:it[0]], it ] }.collect() - GUNZIP_GTF(gtf_meta) - ch_gtf = gtf.endsWith(".gz") ? GUNZIP_GTF.out.gunzip.collect() : gtf_meta.collect() + ch_dict = ch_sequence_dict_input.mix(BUILD_DICT.out.dict).collect() // Get chrom sizes GET_CHROM_SIZES( ch_fai ) - ch_fasta_no_meta = ch_fasta.map{ meta, fasta -> [ fasta ] } + // If no star index, create it + BUILD_STAR_GENOME ( ch_fasta, ch_gtf ) + ch_star_index = ch_star_index_input.mix(BUILD_STAR_GENOME.out.index).collect() - ch_star = star_index ? - Channel.fromPath(star_index).collect().map { it -> [ [id:it[0].simpleName], it ] } - : Channel.empty() - - BUILD_STAR_GENOME (ch_fasta, ch_gtf ) - UNTAR_STAR_INDEX( ch_star.map { it -> [[:], it[1]] } ) - ch_star_index = (!star_index) ? BUILD_STAR_GENOME.out.index.collect() : - (star_index.endsWith(".gz") ? UNTAR_STAR_INDEX.out.untar.map { it[1] } : ch_star) // Convert gtf to refflat for picard GTF_TO_REFFLAT(ch_gtf) // Get rRNA transcripts and convert to interval_list format GET_RRNA_TRANSCRIPTS(ch_gtf) - BEDTOINTERVALLIST( GET_RRNA_TRANSCRIPTS.out.bed.map { it -> [ [id:it.name], it ] }, ch_dict ) ch_interval = BEDTOINTERVALLIST.out.interval_list.map{ meta, interv -> [interv] }.collect() - UNTAR_VEP_CACHE (ch_vep_cache) - ch_untar_vep = UNTAR_VEP_CACHE.out.untar.map{ meta, files -> [files] }.collect() - // Preparing transcript fasta ch_fasta_fai = ch_fasta.mix(ch_fai.map{meta, fai -> fai}).collect() - GFFREAD(ch_gtf, ch_fasta_fai) - transcript_fasta_no_meta = (!transcript_fasta) ? GFFREAD.out.tr_fasta.collect() : - (transcript_fasta.endsWith(".gz") ? GUNZIP_TRFASTA.out.gunzip.collect().map{ meta, fasta -> [ fasta ] } : transcript_fasta) - - // Setting up Salmon index - ch_salmon = salmon_index ? Channel.fromPath(salmon_index).collect() : Channel.empty() - UNTAR_SALMON_INDEX( ch_salmon.map { it -> [[:], it] } ) - SALMON_INDEX(ch_fasta_no_meta, transcript_fasta_no_meta) + ch_transcript_fasta = ch_transcript_fasta_input.mix(GFFREAD.out.tr_fasta.collect()).collect() - ch_salmon_index = (!salmon_index) ? SALMON_INDEX.out.index.collect() : - (salmon_index.endsWith(".gz") ? UNTAR_SALMON_INDEX.out.untar.map { it[1] } : salmon_index.collect()) + // If no Salmon index, create it + SALMON_INDEX(ch_fasta.map{ meta, fasta -> [ fasta ] }, ch_transcript_fasta) + ch_salmon_index = ch_salmon_index_input.mix(SALMON_INDEX.out.index).collect() - ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + // Untar vep chache is necesary + UNTAR_VEP_CACHE (ch_vep_cache_input.map { it -> [[id:'vep_cache'], it] }) + ch_untar_vep = UNTAR_VEP_CACHE.out.untar.map{ meta, files -> [files] }.collect() + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) ch_versions = ch_versions.mix(BUILD_DICT.out.versions) ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions) @@ -96,21 +71,20 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) ch_versions = ch_versions.mix(BEDTOINTERVALLIST.out.versions) ch_versions = ch_versions.mix(GFFREAD.out.versions) - ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) emit: - chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - fasta = ch_fasta // channel: [ val(meta), path(fasta) ] - fai = ch_fai // channel: [ val(meta), path(fai) ] - fasta_fai = ch_fasta_fai // channel: [ val(meta), path(fasta), path(fai) ] - sequence_dict = BUILD_DICT.out.dict.collect() // channel: [ val(meta), path(dict) ] - gtf = ch_gtf // channel: [ val(meta), path(gtf) ] - star_index = ch_star_index // channel: [ val(meta), path(star_index) ] - salmon_index = ch_salmon_index // channel: [ path(salmon_index) ] - refflat = GTF_TO_REFFLAT.out.refflat.collect() // channel: [ path(refflat) ] - rrna_bed = GET_RRNA_TRANSCRIPTS.out.bed.collect() // channel: [ path(bed) ] - interval_list = ch_interval // channel: [ path(interval) ] - vep_resources = ch_untar_vep // channel: [ path(cache) ] - versions = ch_versions // channel: [ path(versions.yml) ] + chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] + fasta = ch_fasta // channel: [ val(meta), path(fasta) ] + fai = ch_fai // channel: [ val(meta), path(fai) ] + fasta_fai = ch_fasta_fai // channel: [ val(meta), path(fasta), path(fai) ] + sequence_dict = ch_dict // channel: [ val(meta), path(dict) ] + star_index = ch_star_index // channel: [ val(meta), path(star_index) ] + salmon_index = ch_salmon_index // channel: [ path(salmon_index) ] + refflat = GTF_TO_REFFLAT.out.refflat.collect() // channel: [ path(refflat) ] + rrna_bed = GET_RRNA_TRANSCRIPTS.out.bed.collect() // channel: [ path(bed) ] + interval_list = ch_interval // channel: [ path(interval) ] + vep_resources = ch_untar_vep // channel: [ path(cache) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/tomte.nf b/workflows/tomte.nf index a877939c..bdfa1f66 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -50,26 +50,41 @@ workflow TOMTE { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + // Mandatory ch_samples = ch_samplesheet.map { meta, fastqs -> meta } ch_case_info = ch_samples.toList().map { create_case_channel(it) } + ch_fasta = Channel.fromPath(params.fasta).map {it -> [[id:it[0].simpleName], it]}.collect() + ch_gtf = Channel.fromPath(params.gtf).map {it -> [[id:it[0].simpleName], it]}.collect() + ch_platform = Channel.from(params.platform).collect() - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) - ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() - : Channel.value([]) - fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + // Optional + ch_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gene_panel_clinical_filter = params.gene_panel_clinical_filter ? Channel.fromPath(params.gene_panel_clinical_filter).collect() + : Channel.empty() + ch_ref_drop_annot_file = params.reference_drop_annot_file ? Channel.fromPath(params.reference_drop_annot_file).collect() : Channel.empty() ch_ref_drop_count_file = params.reference_drop_count_file ? Channel.fromPath(params.reference_drop_count_file).collect() : Channel.empty() ch_ref_drop_splice_folder = params.reference_drop_splice_folder ? Channel.fromPath(params.reference_drop_splice_folder).collect() : Channel.empty() - ch_ref_drop_annot_file = params.reference_drop_annot_file ? Channel.fromPath(params.reference_drop_annot_file).collect() + ch_salmon_index = params.salmon_index ? Channel.fromPath(params.salmon_index) : Channel.empty() - ch_gene_panel_clinical_filter = params.gene_panel_clinical_filter ? Channel.fromPath(params.gene_panel_clinical_filter).collect() + ch_star_index = params.star_index ? Channel.fromPath(params.star_index).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta).colelct() + : Channel.empty() + ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() : Channel.empty() + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() + : Channel.value([[],[]]) ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() : Channel.value([]) - ch_platform = Channel.from(params.platform).collect() + ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() + : Channel.value([]) + // Read and store paths in the vep_plugin_files file ch_vep_extra_files_unsplit.splitCsv ( header:true ) @@ -85,23 +100,19 @@ workflow TOMTE { .set {ch_vep_extra_files} PREPARE_REFERENCES( - params.fasta, - fai, - params.star_index, - params.gtf, + ch_fasta, + ch_fai, + ch_star_index, + ch_gtf, ch_vep_cache_unprocessed, - params.transcript_fasta, - params.salmon_index + ch_transcript_fasta, + ch_salmon_index, + ch_sequence_dict ).set { ch_references } // Gather built indices or get them from the params - ch_chrom_sizes = ch_references.chrom_sizes - ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[:], it] }.collect() - : ( ch_references.sequence_dict ?: Channel.empty() ) - ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() - : Channel.empty() - ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources - : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) + ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources + : ch_vep_cache_unprocessed FASTQC ( ch_samplesheet @@ -112,7 +123,7 @@ workflow TOMTE { ALIGNMENT( ch_samplesheet, ch_references.star_index, - ch_references.gtf, + ch_gtf, ch_platform, ch_subsample_bed, params.seed_frac, @@ -135,7 +146,7 @@ workflow TOMTE { ANALYSE_TRANSCRIPTS( ch_alignment.bam_bai, ch_alignment.bam_ds_bai, - ch_references.gtf, + ch_gtf, ch_references.fasta_fai, ch_alignment.gene_counts, ch_ref_drop_count_file, @@ -156,7 +167,7 @@ workflow TOMTE { ch_alignment.bam_bai, ch_references.fasta, ch_references.fai, - ch_sequence_dict, + ch_references.sequence_dict, params.variant_caller ) ch_versions = ch_versions.mix(CALL_VARIANTS.out.versions) @@ -166,7 +177,7 @@ workflow TOMTE { ch_alignment.bam_bai, ch_references.fasta, ch_references.fai, - ch_sequence_dict, + ch_references.sequence_dict, ch_references.interval_list, ch_case_info ) @@ -184,7 +195,7 @@ workflow TOMTE { IGV_TRACKS( ch_alignment.star_wig, - ch_chrom_sizes, + ch_references.chrom_sizes, ch_alignment.spl_junc ) ch_versions = ch_versions.mix(IGV_TRACKS.out.versions) From 78946298a33dd6e685f424cfdc8ff34bd42a5fdb Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 2 Apr 2024 18:10:12 +0200 Subject: [PATCH 02/45] feat add args to AE and AS --- modules/local/drop_config_runAE.nf | 3 ++- modules/local/drop_config_runAS.nf | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/local/drop_config_runAE.nf b/modules/local/drop_config_runAE.nf index a37bb2b1..7b3d04db 100644 --- a/modules/local/drop_config_runAE.nf +++ b/modules/local/drop_config_runAE.nf @@ -32,6 +32,7 @@ process DROP_CONFIG_RUN_AE { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' def genome_assembly = "${genome}".contains("h37") ? "hg19" : "${genome}" def drop_group = "${drop_group_samples_ae}".replace(" ","") def zscorecutoff = drop_zScoreCutoff ? "--zscorecutoff ${drop_zScoreCutoff}" : '' @@ -52,7 +53,7 @@ process DROP_CONFIG_RUN_AE { $zscorecutoff \\ --output config.yaml - snakemake aberrantExpression --cores ${task.cpus} --rerun-triggers mtime + snakemake aberrantExpression --cores ${task.cpus} --rerun-triggers mtime $args cp output/processed_results/aberrant_expression/*/outrider/outrider/OUTRIDER_results_all.Rds . cp output/processed_data/preprocess/*/gene_name_mapping_*.tsv . diff --git a/modules/local/drop_config_runAS.nf b/modules/local/drop_config_runAS.nf index b6a85862..9c8a650d 100644 --- a/modules/local/drop_config_runAS.nf +++ b/modules/local/drop_config_runAS.nf @@ -31,6 +31,7 @@ process DROP_CONFIG_RUN_AS { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' def genome_assembly = "${genome}".contains("h37") ? "hg19" : "${genome}" def drop_group = "${drop_group_samples_as}".replace(" ","") """ @@ -49,7 +50,7 @@ process DROP_CONFIG_RUN_AS { --padjcutoff ${drop_padjcutoff_as} \\ --output config.yaml - snakemake aberrantSplicing --cores ${task.cpus} --rerun-triggers mtime + snakemake aberrantSplicing --cores ${task.cpus} --rerun-triggers mtime $args cp output/html/AberrantSplicing/FRASER_results_fraser--*.tsv . cp output/processed_data/preprocess/*/gene_name_mapping_*.tsv . From 6ded005936e5243e6c2f1a5c4354758bd3abac7d Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 2 Apr 2024 18:15:03 +0200 Subject: [PATCH 03/45] feat add args to AE and AS --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08d48e46..a84d157f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added automatic tests to test the pipeline with all switches set to false [#100](https://github.com/genomic-medicine-sweden/tomte/pull/100) - Added better documentation on subworkflow input [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) +- Added option to add extra arguments to DROP aberrant expression and aberrant splicing [#104](https://github.com/genomic-medicine-sweden/tomte/pull/104) ### `Fixed` From 780befbe785cc80f6bd54eb5914ae4ef4c4fe270 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 2 Apr 2024 18:17:28 +0200 Subject: [PATCH 04/45] fix trailing space --- modules/local/drop_config_runAE.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/drop_config_runAE.nf b/modules/local/drop_config_runAE.nf index 7b3d04db..16eb7c41 100644 --- a/modules/local/drop_config_runAE.nf +++ b/modules/local/drop_config_runAE.nf @@ -32,7 +32,7 @@ process DROP_CONFIG_RUN_AE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def genome_assembly = "${genome}".contains("h37") ? "hg19" : "${genome}" def drop_group = "${drop_group_samples_ae}".replace(" ","") def zscorecutoff = drop_zScoreCutoff ? "--zscorecutoff ${drop_zScoreCutoff}" : '' From e5f3785bb10d1383df0efc34a3ef0d7e4067356d Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 3 Apr 2024 14:58:13 +0200 Subject: [PATCH 05/45] mergeing dev --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13fd0d2d..682bd318 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Subsample and downsample switches [#97](https://github.com/genomic-medicine-sweden/tomte/pull/97) - Now all reference files come with meta to avoid confusion [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - GATK4_ASEREADCOUNTER and GATK4_SPLITNCIGARREADS have been updated [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) +- Prepare reference module has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) ## 1.1.0 - Rudolph [2024-03-11] From 1b8c5017977450ae7bf1dae7459eb982c62d9727 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 3 Apr 2024 14:59:25 +0200 Subject: [PATCH 06/45] feat reformated references --- conf/modules/prepare_references.config | 53 ++++++++++++ conf/test.config | 4 +- subworkflows/local/prepare_references.nf | 106 +++++++++++++++++++---- workflows/tomte.nf | 6 +- 4 files changed, 149 insertions(+), 20 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index f4d7d489..65cbb34b 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -16,6 +16,17 @@ process { + withName: '.*PREPARE_REFERENCES:GUNZIP_FASTA' { + ext.when = {params.fasta.endsWith(".gz")} + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*.{fasta,fa}" + ] + } + withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' { ext.when = { !params.fai } ext.args2 = { '--no-same-owner' } @@ -39,6 +50,17 @@ process { ] } + withName: '.*PREPARE_REFERENCES:GUNZIP_GTF' { + ext.when = { params.gtf.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*gtf" + ] + } + withName: '.*PREPARE_REFERENCES:GET_RRNA_TRANSCRIPTS' { publishDir = [ path: { "${params.outdir}/references" }, @@ -57,6 +79,16 @@ process { ] } + withName: '.*PREPARE_REFERENCES:UNTAR_STAR_INDEX' { + ext.when = { params.star_index && params.star_index.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + withName: '.*PREPARE_REFERENCES:BUILD_STAR_GENOME' { ext.when = { !params.star_index } publishDir = [ @@ -67,6 +99,17 @@ process { ] } + withName: '.*PREPARE_REFERENCES:GUNZIP_TRFASTA' { + ext.when = { params.transcript_fasta && params.transcript_fasta.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*.{fasta,fa}" + ] + } + withName: '.*PREPARE_REFERENCES:GTF_TO_REFFLAT' { ext.args = { [ '-genePredExt', @@ -108,6 +151,16 @@ process { ] } + withName: '.*PREPARE_REFERENCES:UNTAR_SALMON_INDEX' { + ext.when = { params.salmon_index.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + withName: '.*PREPARE_REFERENCES:SALMON_INDEX' { ext.when = { !params.salmon_index } publishDir = [ diff --git a/conf/test.config b/conf/test.config index c9cc7be8..eeb61a26 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,8 +25,8 @@ params { // Genome references genome = 'GRCh37' - fasta = "${projectDir}/test_data/grch37_chr21.fa" - gtf = "${projectDir}/test_data/grch37_chr21.gtf" + fasta = "${projectDir}/test_data/grch37_chr21.fa.gz" + gtf = "${projectDir}/test_data/grch37_chr21.gtf.gz" // Other parameters star_two_pass_mode = 'None' diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 121720b8..f1e165c2 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -2,15 +2,20 @@ // Prepare reference files // +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' include { GATK4_BEDTOINTERVALLIST as BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_CREATESEQUENCEDICTIONARY as BUILD_DICT } from '../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GUNZIP as GUNZIP_GTF } from '../../modules/nf-core/gunzip/main' include { GET_RRNA_TRANSCRIPTS } from '../../modules/local/get_rrna_transcripts' include { GTFTOGENEPRED_REFFLAT as GTF_TO_REFFLAT } from '../../modules/local/gtftorefflat' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' +include { GUNZIP as GUNZIP_TRFASTA } from '../../modules/nf-core/gunzip/main' include { GFFREAD } from '../../modules/local/gffread' include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main' +include { UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/untar/main' include { STAR_GENOMEGENERATE as BUILD_STAR_GENOME } from '../../modules/nf-core/star/genomegenerate/main' include { UNTAR as UNTAR_VEP_CACHE } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_SALMON_INDEX } from '../../modules/nf-core/untar/main' include { SALMON_INDEX as SALMON_INDEX } from '../../modules/nf-core/salmon/index/main' workflow PREPARE_REFERENCES { @@ -27,61 +32,132 @@ workflow PREPARE_REFERENCES { main: ch_versions = Channel.empty() + // Gunzip fasta if necessary + GUNZIP_FASTA(ch_fasta) + ch_fasta. + branch{ it -> + compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz + return [it[0], it[1]] + uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + return [it[0], it[1]] + } + .set{ch_fasta_mix} + + ch_fasta_final = ch_fasta_mix.uncompressed.mix(GUNZIP_FASTA.out.gunzip.collect()) + // If no genome indices, create it - SAMTOOLS_FAIDX_GENOME(ch_fasta,[[],[]]) + SAMTOOLS_FAIDX_GENOME(ch_fasta_final,[[],[]]) ch_fai = ch_fai_input.mix(SAMTOOLS_FAIDX_GENOME.out.fai).collect() // If no dictionary, create it - BUILD_DICT(ch_fasta) + BUILD_DICT(ch_fasta_final) ch_dict = ch_sequence_dict_input.mix(BUILD_DICT.out.dict).collect() // Get chrom sizes GET_CHROM_SIZES( ch_fai ) + // Gunzip gtf if necessary + GUNZIP_GTF(ch_gtf) + ch_gtf. + branch{ it -> + compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz + return [it[0], it[1]] + uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + return [it[0], it[1]] + } + .set{ch_gtf_mix} + + ch_gtf_final = ch_gtf_mix.uncompressed.mix(GUNZIP_GTF.out.gunzip.collect()) + // If no star index, create it - BUILD_STAR_GENOME ( ch_fasta, ch_gtf ) - ch_star_index = ch_star_index_input.mix(BUILD_STAR_GENOME.out.index).collect() + BUILD_STAR_GENOME ( ch_fasta_final, ch_gtf_final ) + // Untar star index if necessary + UNTAR_STAR_INDEX(ch_star_index_input) + ch_star_index_input. + branch{ it -> + compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz + return [it[0], it[1]] + uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + return [it[0], it[1]] + } + .set{ch_star_mix} + ch_star_mixed = ch_star_mix.uncompressed.mix(UNTAR_STAR_INDEX.out.untar.collect()) + ch_star_final = ch_star_mixed.mix(BUILD_STAR_GENOME.out.index.collect()) + + // Convert gtf to refflat for picard - GTF_TO_REFFLAT(ch_gtf) + GTF_TO_REFFLAT(ch_gtf_final) // Get rRNA transcripts and convert to interval_list format - GET_RRNA_TRANSCRIPTS(ch_gtf) + GET_RRNA_TRANSCRIPTS(ch_gtf_final) BEDTOINTERVALLIST( GET_RRNA_TRANSCRIPTS.out.bed.map { it -> [ [id:it.name], it ] }, ch_dict ) ch_interval = BEDTOINTERVALLIST.out.interval_list.map{ meta, interv -> [interv] }.collect() // Preparing transcript fasta - ch_fasta_fai = ch_fasta.mix(ch_fai.map{meta, fai -> fai}).collect() - GFFREAD(ch_gtf, ch_fasta_fai) - ch_transcript_fasta = ch_transcript_fasta_input.mix(GFFREAD.out.tr_fasta.collect()).collect() + ch_fasta_fai = ch_fasta_final.mix(ch_fai.map{meta, fai -> fai}).collect() + GFFREAD(ch_gtf_final, ch_fasta_fai) + + // Gunzip transcript fasta if necessary + GUNZIP_TRFASTA ( ch_transcript_fasta_input.map { it -> [[:], it] } ) + ch_transcript_fasta_input. + branch{ it -> + compressed: it.toUriString().endsWith(".gz") // If the file ends with .gz + return it + uncompressed: !(it.toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + return it + } + .set{ch_transcript_fasta_mix} + ch_transcript_fasta_mix.compressed.view() + + ch_transcript_fasta_mixed = ch_transcript_fasta_mix.uncompressed.mix(GUNZIP_TRFASTA.out.gunzip.map{meta, index -> index}.collect()) + ch_transcript_fasta_final = ch_transcript_fasta_mixed.mix(GFFREAD.out.tr_fasta.collect()) + + // If no salmon index, create it + SALMON_INDEX(ch_fasta_final.map{ meta, fasta -> [ fasta ] }, ch_transcript_fasta_final) + // Untar salmon index if necessary + UNTAR_SALMON_INDEX( ch_salmon_index_input.map { it -> [[:], it] } ) + ch_salmon_index_input. + branch{ it -> + compressed: it.toUriString().endsWith(".gz") // If the file ends with .gz + return it + uncompressed: !(it.toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + return it + } + .set{ch_salmon_mix} - // If no Salmon index, create it - SALMON_INDEX(ch_fasta.map{ meta, fasta -> [ fasta ] }, ch_transcript_fasta) - ch_salmon_index = ch_salmon_index_input.mix(SALMON_INDEX.out.index).collect() + ch_salmon_mixed = ch_salmon_mix.uncompressed.mix(UNTAR_SALMON_INDEX.out.untar.map{meta, index -> index}.collect()) + ch_salmon_final = ch_salmon_mixed.mix(SALMON_INDEX.out.index.collect()) // Untar vep chache is necesary UNTAR_VEP_CACHE (ch_vep_cache_input.map { it -> [[id:'vep_cache'], it] }) ch_untar_vep = UNTAR_VEP_CACHE.out.untar.map{ meta, files -> [files] }.collect() + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) ch_versions = ch_versions.mix(BUILD_DICT.out.versions) ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions) + ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) + ch_versions = ch_versions.mix(GUNZIP_TRFASTA.out.versions) + ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) ch_versions = ch_versions.mix(BUILD_STAR_GENOME.out.versions) ch_versions = ch_versions.mix(GTF_TO_REFFLAT.out.versions) ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) ch_versions = ch_versions.mix(BEDTOINTERVALLIST.out.versions) ch_versions = ch_versions.mix(GFFREAD.out.versions) + ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions) ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) emit: chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - fasta = ch_fasta // channel: [ val(meta), path(fasta) ] + fasta = ch_fasta_final.collect() // channel: [ val(meta), path(fasta) ] fai = ch_fai // channel: [ val(meta), path(fai) ] fasta_fai = ch_fasta_fai // channel: [ val(meta), path(fasta), path(fai) ] + gtf = ch_gtf_final.collect() // channel: [ val(meta), path(gtf) ] sequence_dict = ch_dict // channel: [ val(meta), path(dict) ] - star_index = ch_star_index // channel: [ val(meta), path(star_index) ] - salmon_index = ch_salmon_index // channel: [ path(salmon_index) ] + star_index = ch_star_final.collect() // channel: [ val(meta), path(star_index) ] + salmon_index = ch_salmon_final.collect() // channel: [ path(salmon_index) ] refflat = GTF_TO_REFFLAT.out.refflat.collect() // channel: [ path(refflat) ] rrna_bed = GET_RRNA_TRANSCRIPTS.out.bed.collect() // channel: [ path(bed) ] interval_list = ch_interval // channel: [ path(interval) ] diff --git a/workflows/tomte.nf b/workflows/tomte.nf index bdfa1f66..7f59e855 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -72,7 +72,7 @@ workflow TOMTE { : Channel.empty() ch_star_index = params.star_index ? Channel.fromPath(params.star_index).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() - ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta).colelct() + ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta) : Channel.empty() ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() @@ -123,7 +123,7 @@ workflow TOMTE { ALIGNMENT( ch_samplesheet, ch_references.star_index, - ch_gtf, + ch_references.gtf, ch_platform, ch_subsample_bed, params.seed_frac, @@ -146,7 +146,7 @@ workflow TOMTE { ANALYSE_TRANSCRIPTS( ch_alignment.bam_bai, ch_alignment.bam_ds_bai, - ch_gtf, + ch_references.gtf, ch_references.fasta_fai, ch_alignment.gene_counts, ch_ref_drop_count_file, From 8184577c86bf9d5797c1959a425b96e1ca186681 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 3 Apr 2024 14:59:35 +0200 Subject: [PATCH 07/45] feat reformated references --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 682bd318..6332bc90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Subsample and downsample switches [#97](https://github.com/genomic-medicine-sweden/tomte/pull/97) - Now all reference files come with meta to avoid confusion [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - GATK4_ASEREADCOUNTER and GATK4_SPLITNCIGARREADS have been updated [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) -- Prepare reference module has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) +- Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) ## 1.1.0 - Rudolph [2024-03-11] From 9d1a4129d386d0878765cf4283c24669fa5143d5 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 3 Apr 2024 15:06:33 +0200 Subject: [PATCH 08/45] fix linting --- conf/modules/prepare_references.config | 2 +- subworkflows/local/prepare_references.nf | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 65cbb34b..1e315494 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -16,7 +16,7 @@ process { - withName: '.*PREPARE_REFERENCES:GUNZIP_FASTA' { + withName: '.*PREPARE_REFERENCES:GUNZIP_FASTA' { ext.when = {params.fasta.endsWith(".gz")} publishDir = [ path: { "${params.outdir}/references" }, diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index f1e165c2..54b2324c 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -85,7 +85,6 @@ workflow PREPARE_REFERENCES { ch_star_mixed = ch_star_mix.uncompressed.mix(UNTAR_STAR_INDEX.out.untar.collect()) ch_star_final = ch_star_mixed.mix(BUILD_STAR_GENOME.out.index.collect()) - // Convert gtf to refflat for picard GTF_TO_REFFLAT(ch_gtf_final) @@ -132,14 +131,14 @@ workflow PREPARE_REFERENCES { // Untar vep chache is necesary UNTAR_VEP_CACHE (ch_vep_cache_input.map { it -> [[id:'vep_cache'], it] }) ch_untar_vep = UNTAR_VEP_CACHE.out.untar.map{ meta, files -> [files] }.collect() - + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) ch_versions = ch_versions.mix(BUILD_DICT.out.versions) ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions) ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) ch_versions = ch_versions.mix(GUNZIP_TRFASTA.out.versions) - ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) + ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) ch_versions = ch_versions.mix(BUILD_STAR_GENOME.out.versions) ch_versions = ch_versions.mix(GTF_TO_REFFLAT.out.versions) ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) From 64af443387fcffe8680c5d3465ad84d9a7a959b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 3 Apr 2024 15:34:25 +0200 Subject: [PATCH 09/45] Correct typos --- subworkflows/local/prepare_references.nf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 54b2324c..e6b33de6 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -38,7 +38,7 @@ workflow PREPARE_REFERENCES { branch{ it -> compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz return [it[0], it[1]] - uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file doesn't end with .gz return [it[0], it[1]] } .set{ch_fasta_mix} @@ -62,7 +62,7 @@ workflow PREPARE_REFERENCES { branch{ it -> compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz return [it[0], it[1]] - uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file doesn't end with .gz return [it[0], it[1]] } .set{ch_gtf_mix} @@ -77,7 +77,7 @@ workflow PREPARE_REFERENCES { branch{ it -> compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz return [it[0], it[1]] - uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file doesn't end with .gz return [it[0], it[1]] } .set{ch_star_mix} @@ -103,11 +103,10 @@ workflow PREPARE_REFERENCES { branch{ it -> compressed: it.toUriString().endsWith(".gz") // If the file ends with .gz return it - uncompressed: !(it.toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + uncompressed: !(it.toUriString().endsWith(".gz")) // If the file doesn't end with .gz return it } .set{ch_transcript_fasta_mix} - ch_transcript_fasta_mix.compressed.view() ch_transcript_fasta_mixed = ch_transcript_fasta_mix.uncompressed.mix(GUNZIP_TRFASTA.out.gunzip.map{meta, index -> index}.collect()) ch_transcript_fasta_final = ch_transcript_fasta_mixed.mix(GFFREAD.out.tr_fasta.collect()) @@ -120,7 +119,7 @@ workflow PREPARE_REFERENCES { branch{ it -> compressed: it.toUriString().endsWith(".gz") // If the file ends with .gz return it - uncompressed: !(it.toUriString().endsWith(".gz")) // If the file dowsn't ends with .gz + uncompressed: !(it.toUriString().endsWith(".gz")) // If the file doesn't end with .gz return it } .set{ch_salmon_mix} From efd342392444f8f13f49d16ddb187776ab290b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Thu, 4 Apr 2024 14:25:11 +0200 Subject: [PATCH 10/45] Apply suggestions from code review Co-authored-by: Anders Jemt --- subworkflows/local/prepare_references.nf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index e6b33de6..b297709d 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -35,11 +35,11 @@ workflow PREPARE_REFERENCES { // Gunzip fasta if necessary GUNZIP_FASTA(ch_fasta) ch_fasta. - branch{ it -> - compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz - return [it[0], it[1]] - uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return [it[0], it[1]] + branch{ meta, fasta -> + compressed: fasta.toUriString().endsWith(".gz") // If the file ends with .gz + return [ meta, fasta ] + uncompressed: !(fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + return [ meta, fasta ] } .set{ch_fasta_mix} @@ -59,18 +59,18 @@ workflow PREPARE_REFERENCES { // Gunzip gtf if necessary GUNZIP_GTF(ch_gtf) ch_gtf. - branch{ it -> - compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz - return [it[0], it[1]] - uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return [it[0], it[1]] + branch{ meta, gtf -> + compressed: meta.toUriString().endsWith(".gz") // If the file ends with .gz + return [ meta, gtf ] + uncompressed: !(gtf.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + return [ meta, gtf ] } .set{ch_gtf_mix} ch_gtf_final = ch_gtf_mix.uncompressed.mix(GUNZIP_GTF.out.gunzip.collect()) // If no star index, create it - BUILD_STAR_GENOME ( ch_fasta_final, ch_gtf_final ) + BUILD_STAR_GENOME( ch_fasta_final, ch_gtf_final ) // Untar star index if necessary UNTAR_STAR_INDEX(ch_star_index_input) ch_star_index_input. From 825b5ad1b06d1bcb1e66bfee4ab3d80ff7063cbc Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Thu, 4 Apr 2024 16:04:40 +0200 Subject: [PATCH 11/45] apply suggestions --- conf/test.config | 2 +- subworkflows/local/prepare_references.nf | 49 ++++++++++++++---------- workflows/tomte.nf | 10 ++--- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/conf/test.config b/conf/test.config index eeb61a26..f4320334 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,7 +35,7 @@ params { num_reads = 20000 // VEP - vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" + //vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" vep_cache_version = 107 vep_plugin_files = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv" diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index b297709d..3d3086c8 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -60,7 +60,7 @@ workflow PREPARE_REFERENCES { GUNZIP_GTF(ch_gtf) ch_gtf. branch{ meta, gtf -> - compressed: meta.toUriString().endsWith(".gz") // If the file ends with .gz + compressed: gtf.toUriString().endsWith(".gz") // If the file ends with .gz return [ meta, gtf ] uncompressed: !(gtf.toUriString().endsWith(".gz")) // If the file doesn't end with .gz return [ meta, gtf ] @@ -74,11 +74,11 @@ workflow PREPARE_REFERENCES { // Untar star index if necessary UNTAR_STAR_INDEX(ch_star_index_input) ch_star_index_input. - branch{ it -> - compressed: it[1].toUriString().endsWith(".gz") // If the file ends with .gz - return [it[0], it[1]] - uncompressed: !(it[1].toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return [it[0], it[1]] + branch{ meta, star_index -> + compressed: star_index.toUriString().endsWith(".gz") // If the file ends with .gz + return [ meta, star_index ] + uncompressed: !(star_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + return [ meta, star_index ] } .set{ch_star_mix} @@ -94,17 +94,17 @@ workflow PREPARE_REFERENCES { ch_interval = BEDTOINTERVALLIST.out.interval_list.map{ meta, interv -> [interv] }.collect() // Preparing transcript fasta - ch_fasta_fai = ch_fasta_final.mix(ch_fai.map{meta, fai -> fai}).collect() + ch_fasta_fai = ch_fasta_final.join(ch_fai).collect() GFFREAD(ch_gtf_final, ch_fasta_fai) // Gunzip transcript fasta if necessary GUNZIP_TRFASTA ( ch_transcript_fasta_input.map { it -> [[:], it] } ) ch_transcript_fasta_input. - branch{ it -> - compressed: it.toUriString().endsWith(".gz") // If the file ends with .gz - return it - uncompressed: !(it.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return it + branch{ tr_fasta -> + compressed: tr_fasta.toUriString().endsWith(".gz") // If the file ends with .gz + return tr_fasta + uncompressed: !(tr_fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + return tr_fasta } .set{ch_transcript_fasta_mix} @@ -116,11 +116,11 @@ workflow PREPARE_REFERENCES { // Untar salmon index if necessary UNTAR_SALMON_INDEX( ch_salmon_index_input.map { it -> [[:], it] } ) ch_salmon_index_input. - branch{ it -> - compressed: it.toUriString().endsWith(".gz") // If the file ends with .gz - return it - uncompressed: !(it.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return it + branch{ salmon_index -> + compressed: salmon_index.toUriString().endsWith(".gz") // If the file ends with .gz + return salmon_index + uncompressed: !(salmon_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + return salmon_index } .set{ch_salmon_mix} @@ -128,8 +128,17 @@ workflow PREPARE_REFERENCES { ch_salmon_final = ch_salmon_mixed.mix(SALMON_INDEX.out.index.collect()) // Untar vep chache is necesary - UNTAR_VEP_CACHE (ch_vep_cache_input.map { it -> [[id:'vep_cache'], it] }) - ch_untar_vep = UNTAR_VEP_CACHE.out.untar.map{ meta, files -> [files] }.collect() + UNTAR_VEP_CACHE (ch_vep_cache_input.map { vep_cache -> [[id:'vep_cache'], vep_cache] }) + ch_vep_cache_input. + branch{ vep_cache -> + compressed: vep_cache.toUriString().endsWith(".gz") // If the file ends with .gz + return vep_cache + uncompressed: !(vep_cache.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + return vep_cache + } + .set{ch_vep_cache_mix} + + ch_final_vep = ch_vep_cache_mix.uncompressed.mix(UNTAR_VEP_CACHE.out.untar.map{meta, vep_cache -> vep_cache}.collect()) ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) @@ -159,6 +168,6 @@ workflow PREPARE_REFERENCES { refflat = GTF_TO_REFFLAT.out.refflat.collect() // channel: [ path(refflat) ] rrna_bed = GET_RRNA_TRANSCRIPTS.out.bed.collect() // channel: [ path(bed) ] interval_list = ch_interval // channel: [ path(interval) ] - vep_resources = ch_untar_vep // channel: [ path(cache) ] + vep_cache = ch_final_vep.collect() // channel: [ path(cache) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/tomte.nf b/workflows/tomte.nf index 7f59e855..e5ebee68 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -78,8 +78,8 @@ workflow TOMTE { : Channel.empty() ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() : Channel.empty() - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() - : Channel.value([[],[]]) + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache) + : Channel.empty() ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() : Channel.value([]) ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() @@ -110,10 +110,6 @@ workflow TOMTE { ch_sequence_dict ).set { ch_references } - // Gather built indices or get them from the params - ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources - : ch_vep_cache_unprocessed - FASTQC ( ch_samplesheet ) @@ -187,7 +183,7 @@ workflow TOMTE { ALLELE_SPECIFIC_CALLING.out.vcf, params.genome, params.vep_cache_version, - ch_vep_cache, + ch_references.vep_cache, ch_references.fasta, ch_vep_extra_files, ) From 53c771fdd0871aee94b63587791eea74ecd7b89e Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Fri, 5 Apr 2024 10:32:21 +0200 Subject: [PATCH 12/45] feat clarified subworkflow input output content --- subworkflows/local/annotate_snv.nf | 13 ++++++------- subworkflows/local/igv_tracks.nf | 12 ++++++------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index 31a71438..5f2256d2 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -34,12 +34,11 @@ workflow ANNOTATE_SNV { ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) emit: - json = ENSEMBLVEP_VEP.out.json - vcf_gz = ENSEMBLVEP_VEP.out.vcf - tbi_gz = TABIX_VEP.out.tbi - tab_gz = ENSEMBLVEP_VEP.out.tab - json_gz = ENSEMBLVEP_VEP.out.json - report = ENSEMBLVEP_VEP.out.report - versions = ch_versions // channel: [ path(versions.yml) ] + vcf_gz = ENSEMBLVEP_VEP.out.vcf // channel: [ val(meta), path(vcf.gz) ] + tbi_gz = TABIX_VEP.out.tbi // channel: [ val(meta), path(tbi) ] + tab_gz = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab.gz) ] + json_gz = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json.gz) ] + report = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/igv_tracks.nf b/subworkflows/local/igv_tracks.nf index f6b4d7b9..7aa4c683 100644 --- a/subworkflows/local/igv_tracks.nf +++ b/subworkflows/local/igv_tracks.nf @@ -8,9 +8,9 @@ include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' workflow IGV_TRACKS { take: - wig - chrom_sizes - spl_junc + wig // channel: [mandatory] [ val(meta), path(wig) ] + chrom_sizes // channel: [mandatory] [ path(sizes) ] + spl_junc // channel: [mandatory] [ val(meta), path(tsv) ] main: ch_versions = Channel.empty() @@ -41,7 +41,7 @@ workflow IGV_TRACKS { emit: - bw = UCSC_WIGTOBIGWIG.out.bw - bed = TABIX_BGZIPTABIX.out.gz_tbi - versions = ch_versions + bw = UCSC_WIGTOBIGWIG.out.bw // channel: [ val(meta), path(bw) ] + bed = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), path(bed.gz), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From 334ec8a2e0d9cca7670019952c50f739797ebaf7 Mon Sep 17 00:00:00 2001 From: Felix Lenner <52530259+fellen31@users.noreply.github.com> Date: Fri, 5 Apr 2024 14:24:32 +0200 Subject: [PATCH 13/45] Update FastQC (#106) Update FastQC --- CHANGELOG.md | 1 + modules.json | 2 +- modules/nf-core/fastqc/main.nf | 6 ++++++ modules/nf-core/fastqc/tests/main.nf.test.snap | 2 +- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc530fc5..74d722b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - GATK4_ASEREADCOUNTER and GATK4_SPLITNCIGARREADS have been updated [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - Updated GATK4_ASEREADCOUNTER, now bam and vcf will be given as one channel [#103](https://github.com/genomic-medicine-sweden/tomte/pull/103) - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) +- FastQC have been updated to correctly allocate memory [#168](https://github.com/genomic-medicine-sweden/tomte/pull/168) ## 1.1.0 - Rudolph [2024-03-11] diff --git a/modules.json b/modules.json index ff4a6d5d..74eae697 100644 --- a/modules.json +++ b/modules.json @@ -48,7 +48,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "gatk4/asereadcounter": { diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74c..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 10da7d9d..86f7c311 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -85,4 +85,4 @@ }, "timestamp": "2024-01-31T17:41:14.576531" } -} +} \ No newline at end of file From 3b3fce5d7702fa35f8692c54ffd91aee4e397104 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Mon, 8 Apr 2024 13:46:29 +0200 Subject: [PATCH 14/45] feat made a function to branch compressed and uncompressed --- conf/test.config | 2 +- subworkflows/local/prepare_references.nf | 140 ++++++++++++++--------- 2 files changed, 87 insertions(+), 55 deletions(-) diff --git a/conf/test.config b/conf/test.config index f4320334..eeb61a26 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,7 +35,7 @@ params { num_reads = 20000 // VEP - //vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" + vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" vep_cache_version = 107 vep_plugin_files = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv" diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 3d3086c8..9fc302e4 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -31,17 +31,18 @@ workflow PREPARE_REFERENCES { main: ch_versions = Channel.empty() - + // Gunzip fasta if necessary GUNZIP_FASTA(ch_fasta) - ch_fasta. - branch{ meta, fasta -> - compressed: fasta.toUriString().endsWith(".gz") // If the file ends with .gz - return [ meta, fasta ] - uncompressed: !(fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return [ meta, fasta ] - } - .set{ch_fasta_mix} + //ch_fasta. + // branch{ meta, fasta -> + // compressed: fasta.toUriString().endsWith(".gz") // If the file ends with .gz + // return [ meta, fasta ] + // uncompressed: !(fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + // return [ meta, fasta ] + // } + // .set{ch_fasta_mix} + ch_fasta_mix = branchChannelToCompressedAndUncompressed(ch_fasta) ch_fasta_final = ch_fasta_mix.uncompressed.mix(GUNZIP_FASTA.out.gunzip.collect()) @@ -58,30 +59,32 @@ workflow PREPARE_REFERENCES { // Gunzip gtf if necessary GUNZIP_GTF(ch_gtf) - ch_gtf. - branch{ meta, gtf -> - compressed: gtf.toUriString().endsWith(".gz") // If the file ends with .gz - return [ meta, gtf ] - uncompressed: !(gtf.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return [ meta, gtf ] - } - .set{ch_gtf_mix} - + //ch_gtf. + // branch{ meta, gtf -> + // compressed: gtf.toUriString().endsWith(".gz") // If the file ends with .gz + // return [ meta, gtf ] + // uncompressed: !(gtf.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + // return [ meta, gtf ] + // } + // .set{ch_gtf_mix} + + ch_gtf_mix = branchChannelToCompressedAndUncompressed(ch_gtf) ch_gtf_final = ch_gtf_mix.uncompressed.mix(GUNZIP_GTF.out.gunzip.collect()) // If no star index, create it BUILD_STAR_GENOME( ch_fasta_final, ch_gtf_final ) // Untar star index if necessary UNTAR_STAR_INDEX(ch_star_index_input) - ch_star_index_input. - branch{ meta, star_index -> - compressed: star_index.toUriString().endsWith(".gz") // If the file ends with .gz - return [ meta, star_index ] - uncompressed: !(star_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return [ meta, star_index ] - } - .set{ch_star_mix} - + //ch_star_index_input. + // branch{ meta, star_index -> + // compressed: star_index.toUriString().endsWith(".gz") // If the file ends with .gz + // return [ meta, star_index ] + // uncompressed: !(star_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + // return [ meta, star_index ] + // } + // .set{ch_star_mix} + + ch_star_mix = branchChannelToCompressedAndUncompressed(ch_star_index_input) ch_star_mixed = ch_star_mix.uncompressed.mix(UNTAR_STAR_INDEX.out.untar.collect()) ch_star_final = ch_star_mixed.mix(BUILD_STAR_GENOME.out.index.collect()) @@ -99,15 +102,16 @@ workflow PREPARE_REFERENCES { // Gunzip transcript fasta if necessary GUNZIP_TRFASTA ( ch_transcript_fasta_input.map { it -> [[:], it] } ) - ch_transcript_fasta_input. - branch{ tr_fasta -> - compressed: tr_fasta.toUriString().endsWith(".gz") // If the file ends with .gz - return tr_fasta - uncompressed: !(tr_fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return tr_fasta - } - .set{ch_transcript_fasta_mix} - + //ch_transcript_fasta_input. + // branch{ tr_fasta -> + // compressed: tr_fasta.toUriString().endsWith(".gz") // If the file ends with .gz + // return tr_fasta + // uncompressed: !(tr_fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + // return tr_fasta + // } + // .set{ch_transcript_fasta_mix} + + ch_transcript_fasta_mix = branchChannelToCompressedAndUncompressed(ch_transcript_fasta_input) ch_transcript_fasta_mixed = ch_transcript_fasta_mix.uncompressed.mix(GUNZIP_TRFASTA.out.gunzip.map{meta, index -> index}.collect()) ch_transcript_fasta_final = ch_transcript_fasta_mixed.mix(GFFREAD.out.tr_fasta.collect()) @@ -115,29 +119,31 @@ workflow PREPARE_REFERENCES { SALMON_INDEX(ch_fasta_final.map{ meta, fasta -> [ fasta ] }, ch_transcript_fasta_final) // Untar salmon index if necessary UNTAR_SALMON_INDEX( ch_salmon_index_input.map { it -> [[:], it] } ) - ch_salmon_index_input. - branch{ salmon_index -> - compressed: salmon_index.toUriString().endsWith(".gz") // If the file ends with .gz - return salmon_index - uncompressed: !(salmon_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return salmon_index - } - .set{ch_salmon_mix} - + //ch_salmon_index_input. + // branch{ salmon_index -> + // compressed: salmon_index.toUriString().endsWith(".gz") // If the file ends with .gz + // return salmon_index + // uncompressed: !(salmon_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + // return salmon_index + // } + // .set{ch_salmon_mix} + + ch_salmon_mix = branchChannelToCompressedAndUncompressed(ch_salmon_index_input) ch_salmon_mixed = ch_salmon_mix.uncompressed.mix(UNTAR_SALMON_INDEX.out.untar.map{meta, index -> index}.collect()) ch_salmon_final = ch_salmon_mixed.mix(SALMON_INDEX.out.index.collect()) // Untar vep chache is necesary UNTAR_VEP_CACHE (ch_vep_cache_input.map { vep_cache -> [[id:'vep_cache'], vep_cache] }) - ch_vep_cache_input. - branch{ vep_cache -> - compressed: vep_cache.toUriString().endsWith(".gz") // If the file ends with .gz - return vep_cache - uncompressed: !(vep_cache.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - return vep_cache - } - .set{ch_vep_cache_mix} - + //ch_vep_cache_input. + // branch{ vep_cache -> + // compressed: vep_cache.toUriString().endsWith(".gz") // If the file ends with .gz + // return vep_cache + // uncompressed: !(vep_cache.toUriString().endsWith(".gz")) // If the file doesn't end with .gz + // return vep_cache + // } + // .set{ch_vep_cache_mix} + + ch_vep_cache_mix = branchChannelToCompressedAndUncompressed(ch_vep_cache_input) ch_final_vep = ch_vep_cache_mix.uncompressed.mix(UNTAR_VEP_CACHE.out.untar.map{meta, vep_cache -> vep_cache}.collect()) ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) @@ -171,3 +177,29 @@ workflow PREPARE_REFERENCES { vep_cache = ch_final_vep.collect() // channel: [ path(cache) ] versions = ch_versions // channel: [ path(versions.yml) ] } +// Custom functions +/** +* Branch a channel into differnt channels, +* depending on whether the path is compressed or not. +* The resulting channels get meta only if the original one had it. +* +* @param Channel that may contain meta +* @return Channel branched on whether the file is compressed or uncompressed +*/ +def branchChannelToCompressedAndUncompressed(ch) { + if (ch.flatten().count().map{it == (1)}) { + return ch.branch { file -> + compressed: file.join("").endsWith(".gz") // If the file ends with .gz + return file + uncompressed: !(file.join("").endsWith(".gz")) // If the file doesn't end with .gz + return file + } + } else if (ch.flatten().count().map{it == (2)}) { + return ch.branch{ meta, file -> + compressed: file.join("").endsWith(".gz") // If the file ends with .gz + return [ meta, file ] + uncompressed: !(file.join("").endsWith(".gz")) // If the file doesn't end with .gz + return [ meta, file ] + } + } +} From 62196d1eef821277153d12c1131f1b590c62a087 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Mon, 8 Apr 2024 13:51:19 +0200 Subject: [PATCH 15/45] removed commented lines --- subworkflows/local/prepare_references.nf | 53 ------------------------ 1 file changed, 53 deletions(-) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 9fc302e4..6e62a8eb 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -34,14 +34,6 @@ workflow PREPARE_REFERENCES { // Gunzip fasta if necessary GUNZIP_FASTA(ch_fasta) - //ch_fasta. - // branch{ meta, fasta -> - // compressed: fasta.toUriString().endsWith(".gz") // If the file ends with .gz - // return [ meta, fasta ] - // uncompressed: !(fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - // return [ meta, fasta ] - // } - // .set{ch_fasta_mix} ch_fasta_mix = branchChannelToCompressedAndUncompressed(ch_fasta) ch_fasta_final = ch_fasta_mix.uncompressed.mix(GUNZIP_FASTA.out.gunzip.collect()) @@ -59,15 +51,6 @@ workflow PREPARE_REFERENCES { // Gunzip gtf if necessary GUNZIP_GTF(ch_gtf) - //ch_gtf. - // branch{ meta, gtf -> - // compressed: gtf.toUriString().endsWith(".gz") // If the file ends with .gz - // return [ meta, gtf ] - // uncompressed: !(gtf.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - // return [ meta, gtf ] - // } - // .set{ch_gtf_mix} - ch_gtf_mix = branchChannelToCompressedAndUncompressed(ch_gtf) ch_gtf_final = ch_gtf_mix.uncompressed.mix(GUNZIP_GTF.out.gunzip.collect()) @@ -75,15 +58,6 @@ workflow PREPARE_REFERENCES { BUILD_STAR_GENOME( ch_fasta_final, ch_gtf_final ) // Untar star index if necessary UNTAR_STAR_INDEX(ch_star_index_input) - //ch_star_index_input. - // branch{ meta, star_index -> - // compressed: star_index.toUriString().endsWith(".gz") // If the file ends with .gz - // return [ meta, star_index ] - // uncompressed: !(star_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - // return [ meta, star_index ] - // } - // .set{ch_star_mix} - ch_star_mix = branchChannelToCompressedAndUncompressed(ch_star_index_input) ch_star_mixed = ch_star_mix.uncompressed.mix(UNTAR_STAR_INDEX.out.untar.collect()) ch_star_final = ch_star_mixed.mix(BUILD_STAR_GENOME.out.index.collect()) @@ -102,15 +76,6 @@ workflow PREPARE_REFERENCES { // Gunzip transcript fasta if necessary GUNZIP_TRFASTA ( ch_transcript_fasta_input.map { it -> [[:], it] } ) - //ch_transcript_fasta_input. - // branch{ tr_fasta -> - // compressed: tr_fasta.toUriString().endsWith(".gz") // If the file ends with .gz - // return tr_fasta - // uncompressed: !(tr_fasta.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - // return tr_fasta - // } - // .set{ch_transcript_fasta_mix} - ch_transcript_fasta_mix = branchChannelToCompressedAndUncompressed(ch_transcript_fasta_input) ch_transcript_fasta_mixed = ch_transcript_fasta_mix.uncompressed.mix(GUNZIP_TRFASTA.out.gunzip.map{meta, index -> index}.collect()) ch_transcript_fasta_final = ch_transcript_fasta_mixed.mix(GFFREAD.out.tr_fasta.collect()) @@ -119,30 +84,12 @@ workflow PREPARE_REFERENCES { SALMON_INDEX(ch_fasta_final.map{ meta, fasta -> [ fasta ] }, ch_transcript_fasta_final) // Untar salmon index if necessary UNTAR_SALMON_INDEX( ch_salmon_index_input.map { it -> [[:], it] } ) - //ch_salmon_index_input. - // branch{ salmon_index -> - // compressed: salmon_index.toUriString().endsWith(".gz") // If the file ends with .gz - // return salmon_index - // uncompressed: !(salmon_index.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - // return salmon_index - // } - // .set{ch_salmon_mix} - ch_salmon_mix = branchChannelToCompressedAndUncompressed(ch_salmon_index_input) ch_salmon_mixed = ch_salmon_mix.uncompressed.mix(UNTAR_SALMON_INDEX.out.untar.map{meta, index -> index}.collect()) ch_salmon_final = ch_salmon_mixed.mix(SALMON_INDEX.out.index.collect()) // Untar vep chache is necesary UNTAR_VEP_CACHE (ch_vep_cache_input.map { vep_cache -> [[id:'vep_cache'], vep_cache] }) - //ch_vep_cache_input. - // branch{ vep_cache -> - // compressed: vep_cache.toUriString().endsWith(".gz") // If the file ends with .gz - // return vep_cache - // uncompressed: !(vep_cache.toUriString().endsWith(".gz")) // If the file doesn't end with .gz - // return vep_cache - // } - // .set{ch_vep_cache_mix} - ch_vep_cache_mix = branchChannelToCompressedAndUncompressed(ch_vep_cache_input) ch_final_vep = ch_vep_cache_mix.uncompressed.mix(UNTAR_VEP_CACHE.out.untar.map{meta, vep_cache -> vep_cache}.collect()) From 0f3201ef520615c01e18ffdb9ee58511d63f3be0 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Mon, 8 Apr 2024 13:56:41 +0200 Subject: [PATCH 16/45] removed trailing space and added change to changelog --- CHANGELOG.md | 3 ++- subworkflows/local/prepare_references.nf | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74d722b0..796db969 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added automatic tests to test the pipeline with all switches set to false [#100](https://github.com/genomic-medicine-sweden/tomte/pull/100) - Added better documentation on subworkflow input [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - Added option to add extra arguments to DROP aberrant expression and aberrant splicing [#104](https://github.com/genomic-medicine-sweden/tomte/pull/104) +- Added a function to branch references into compressed/uncompressed [#107](https://github.com/genomic-medicine-sweden/tomte/pull/107) ### `Fixed` @@ -16,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - GATK4_ASEREADCOUNTER and GATK4_SPLITNCIGARREADS have been updated [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - Updated GATK4_ASEREADCOUNTER, now bam and vcf will be given as one channel [#103](https://github.com/genomic-medicine-sweden/tomte/pull/103) - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) -- FastQC have been updated to correctly allocate memory [#168](https://github.com/genomic-medicine-sweden/tomte/pull/168) +- FastQC have been updated to correctly allocate memory [#106](https://github.com/genomic-medicine-sweden/tomte/pull/106) ## 1.1.0 - Rudolph [2024-03-11] diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 6e62a8eb..3b3082ee 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -31,7 +31,7 @@ workflow PREPARE_REFERENCES { main: ch_versions = Channel.empty() - + // Gunzip fasta if necessary GUNZIP_FASTA(ch_fasta) ch_fasta_mix = branchChannelToCompressedAndUncompressed(ch_fasta) From b0adc014296e9e2884a86d6e3ffc7e8a9ec500f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Mon, 8 Apr 2024 14:01:36 +0200 Subject: [PATCH 17/45] Update subworkflows/local/prepare_references.nf --- subworkflows/local/prepare_references.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 3b3082ee..0aefc842 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -126,7 +126,7 @@ workflow PREPARE_REFERENCES { } // Custom functions /** -* Branch a channel into differnt channels, +* Branch a channel into different channels, * depending on whether the path is compressed or not. * The resulting channels get meta only if the original one had it. * From 32d8d14f3b9e6c88051e8df04b55433869d1a409 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 10 Apr 2024 10:18:35 +0200 Subject: [PATCH 18/45] changed parameters from switch to skip --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 27 +++++++++ conf/modules/analyse_transcripts.config | 12 ++-- conf/modules/annotate_snv.config | 2 +- conf/modules/igv_tracks.config | 6 +- docs/usage.md | 60 +++++++++---------- nextflow.config | 14 ++--- nextflow_schema.json | 42 ++++++------- subworkflows/local/alignment.nf | 28 ++++----- subworkflows/local/analyse_transcripts.nf | 3 +- .../local/utils_nfcore_tomte_pipeline/main.nf | 22 +++---- workflows/tomte.nf | 5 +- 12 files changed, 126 insertions(+), 97 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa23b66a..831943a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - "latest-everything" parameters: - "-profile test,docker" - - "-profile test,docker --switch_subsample_region false --switch_downsample false --switch_build_tracks false --switch_stringtie false --switch_vep false --switch_drop_ae false --switch_drop_as false" + - "-profile test,docker --skip_subsample_region true --skip_downsample true --skip_build_tracks true --skip_stringtie true --skip_vep true --skip_drop_ae true --skip_drop_as true" steps: - name: Check out pipeline code uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 796db969..a1cd7546 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------- | + +:::note +Parameter has been updated if both old and new parameter information is present. +Parameter has been added if just the new parameter information is present. +Parameter has been removed if new parameter information isn't present. +::: + ### `Added` - Added automatic tests to test the pipeline with all switches set to false [#100](https://github.com/genomic-medicine-sweden/tomte/pull/100) @@ -19,6 +30,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) - FastQC have been updated to correctly allocate memory [#106](https://github.com/genomic-medicine-sweden/tomte/pull/106) +### `Parameters` + +- Updated parameter names to make their use easier and more clear, changing the names from `switch` to `skip` and their default value from `true` to `false` [#108](https://github.com/genomic-medicine-sweden/tomte/pull/108) + +| Old parameter | New parameter | +| --------------------------- | ------------------------- | +| `--switch_subsample_region` | `--skip_subsample_region` | +| `--switch_downsample` | `--skip_downsample` | +| `--switch_build_tracks` | `--skip_build_tracks` | +| `--switch_stringtie` | `--skip_stringtie` | +| `--switch_vep` | `--skip_vep` | +| `--switch_drop_ae` | `--skip_drop_ae` | +| `--switch_drop_as` | `--skip_drop_as` | + +:::note Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: + ## 1.1.0 - Rudolph [2024-03-11] Release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template. diff --git a/conf/modules/analyse_transcripts.config b/conf/modules/analyse_transcripts.config index c9ed8ed7..efeaff62 100644 --- a/conf/modules/analyse_transcripts.config +++ b/conf/modules/analyse_transcripts.config @@ -16,7 +16,7 @@ process { withName: '.*ANALYSE_TRANSCRIPTS:DROP_SAMPLE_ANNOT' { - ext.when = { params.switch_drop_ae | params.switch_drop_as } + ext.when = { (!params.skip_drop_ae) | (!params.skip_drop_as) } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop" }, mode: params.publish_dir_mode, @@ -25,7 +25,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AE' { - ext.when = { params.switch_drop_ae } + ext.when = { (!params.skip_drop_ae) } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop/AE" }, mode: params.publish_dir_mode, @@ -34,7 +34,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AS' { - ext.when = { params.switch_drop_as } + ext.when = { (!params.skip_drop_as) } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop/AS" }, mode: params.publish_dir_mode, @@ -43,7 +43,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:DROP_FILTER_RESULTS' { - ext.when = { params.switch_drop_ae | params.switch_drop_as } + ext.when = { (!params.skip_drop_ae) | (!params.skip_drop_as) } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop" }, mode: params.publish_dir_mode, @@ -52,7 +52,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:STRINGTIE_STRINGTIE' { - ext.when = { params.switch_stringtie } + ext.when = { (!params.skip_stringtie) } publishDir = [ path: { "${params.outdir}/analyse_transcripts" }, mode: params.publish_dir_mode, @@ -61,7 +61,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:GFFCOMPARE' { - ext.when = { params.switch_stringtie } + ext.when = { (!params.skip_stringtie) } publishDir = [ path: { "${params.outdir}/analyse_transcripts" }, mode: params.publish_dir_mode, diff --git a/conf/modules/annotate_snv.config b/conf/modules/annotate_snv.config index 2b3c299e..e789b507 100644 --- a/conf/modules/annotate_snv.config +++ b/conf/modules/annotate_snv.config @@ -16,7 +16,7 @@ process { withName: '.*ANNOTATE_SNV:ENSEMBLVEP_VEP' { - ext.when = { params.switch_vep } + ext.when = { (!params.skip_vep) } ext.prefix = { "${vcf.simpleName}_vep" } ext.args = { [ '--dir_plugins vep_cache/Plugins', diff --git a/conf/modules/igv_tracks.config b/conf/modules/igv_tracks.config index a4dea5eb..11b8e3fb 100644 --- a/conf/modules/igv_tracks.config +++ b/conf/modules/igv_tracks.config @@ -16,16 +16,16 @@ process { withName: '.*IGV_TRACKS:UCSC_WIGTOBIGWIG' { - ext.when = { params.switch_build_tracks } + ext.when = { (!params.skip_build_tracks) } ext.args = { '-clip' } } withName: '.*IGV_TRACKS:JUNCTION_TRACK' { - ext.when = { params.switch_build_tracks } + ext.when = { (!params.skip_build_tracks) } } withName: '.*IGV_TRACKS:TABIX_BGZIPTABIX' { - ext.when = { params.switch_build_tracks } + ext.when = { (!params.skip_build_tracks) } ext.args2 = { '--preset bed' } ext.prefix = { "${meta.id}_junction" } publishDir = [ diff --git a/docs/usage.md b/docs/usage.md index 125e4339..681f576d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -123,7 +123,7 @@ If you would like to see more examples of what a typical samplesheet looks like In genomic-medicine-sweden/tomte, references can be supplied using parameters. We have also introduced the possiblility of using the `--igenomes_base` parameter to point to a path where genome specific reference files are placed (fasta, fai, gtf, star_index, salmon_index, subsample_bed). To make sure that the names of the reference files match those in your directory, check [igenomes.config](https://github.com/genomic-medicine-sweden/tomte/blob/master/conf/igenomes.config). -Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can call SNVs either with BCFtools or with GATK. You also have the option to turn off sections of the pipeline if you do not want to run them. For example, drop aberrant expression module can be turned off by setting `--switch_drop_ae FALSE`. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, if you are not running DROP aberrant splicing, you do not need to provide `--reference_drop_splice_folder`. +Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can call SNVs either with BCFtools or with GATK. You also have the option to turn off sections of the pipeline if you do not want to run them. For example, drop aberrant expression module can be turned off by setting `--skip_drop_ae TRUE`. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, if you are not running DROP aberrant splicing, you do not need to provide `--reference_drop_splice_folder`. genomic-medicine-sweden/tomte consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories: @@ -163,20 +163,20 @@ The mandatory and optional parameters for each category are tabulated below. ##### 2. Junction track and bigwig -| Mandatory | Optional | -| --------- | -------------------------------- | -| | switch_build_tracks 1 | +| Mandatory | Optional | +| --------- | ------------------------------ | +| | skip_build_tracks 1 | -1 If it is not provided by the user, the default value is true +1 If it is not provided by the user, the default value is false ##### 3. Subsample region -| Mandatory | Optional | -| ------------- | ----------------------------------- | -| subsample_bed | switch_subsample_region1 | -| | seed_frac2 | +| Mandatory | Optional | +| ------------- | --------------------------------- | +| subsample_bed | skip_subsample_region1 | +| | seed_frac2 | -1 If it is not provided by the user, the default value is true +1 If it is not provided by the user, the default value is false 2 If it is not provided by the user, the default value is 0.001 ##### 4. Variant calling - SNV @@ -191,25 +191,25 @@ The mandatory and optional parameters for each category are tabulated below. #### 5. SNV annotation (ensembl VEP) -| Mandatory | Optional | -| ---------------------------- | ---------------------- | -| vep_plugin_files1 | switch_vep2 | -| | vep_cache3 | -| | vep_cache_version | -| | vep_filters | +| Mandatory | Optional | +| ---------------------------- | --------------------- | +| vep_plugin_files1 | skip_vep2 | +| | vep_cache3 | +| | vep_cache_version | +| | vep_filters | 1 VEP caches can be downloaded [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#cache). VEP plugins may be installed in the cache directory, and the plugin pLI is mandatory to install. To supply files required by VEP plugins, use `vep_plugin_files` parameter. See example cache [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz).
-2 If it is not provided by the user, the default value is true
+2 If it is not provided by the user, the default value is false
3 If it is not provided by the user, the default value is 110, supported values are 107 and 110
#### 6. Stringtie & gffcompare -| Mandatory | Optional | -| --------- | ---------------------------- | -| fasta | switch_stringtie1 | -| gtf | | +| Mandatory | Optional | +| --------- | -------------------------- | +| fasta | skip_stringtie1 | +| gtf | | -1 If it is not provided by the user, the default value is true +1 If it is not provided by the user, the default value is false #### 7. DROP @@ -217,21 +217,21 @@ DROP - aberrant expression | Mandatory | Optional | | ------------------------------------- | --------------------------------- | -| reference_drop_annot_file1 | switch_drop_ae2 | +| reference_drop_annot_file1 | skip_drop_ae2 | | reference_drop_count_file | drop_group_samples_ae3 | | fasta | drop_padjcutoff_ae4 | | gtf | drop_zscorecutoff5 | | | gene_panel_clinical_filter | -| | switch_downsample6 | +| | skip_downsample6 | | | num_reads7 | | | genome8 | 1 To get more information on how to format it, see below
-2 If it is not provided by the user, the default value is true
+2 If it is not provided by the user, the default value is false
3 If it is not provided by the user, the default value is outrider
4 If it is not provided by the user, the default value is 0.05
5 If it is not provided by the user, the default value is 0
-6 If it is not provided by the user, the default value is true
+6 If it is not provided by the user, the default value is false
7 If it is not provided by the user, the default value is 120000000
8 If it is not provided by the user, the default value is GRCh38 @@ -239,19 +239,19 @@ DROP - aberrant splicing | Mandatory | Optional | | ------------------------------------- | --------------------------------- | -| reference_drop_annot_file1 | switch_drop_as2 | +| reference_drop_annot_file1 | skip_drop_as2 | | reference_drop_splice_folder | drop_group_samples_as3 | | | drop_padjcutoff_as4 | | | gene_panel_clinical_filter | -| | switch_downsample5 | +| | skip_downsample5 | | | num_reads6 | | | genome7 | 1 To get more information on how to format it, see below
-2 If it is not provided by the user, the default value is true
+2 If it is not provided by the user, the default value is false
3 If it is not provided by the user, the default value is fraser
4 If it is not provided by the user, the default value is 0.1
-5 If it is not provided by the user, the default value is true
+5 If it is not provided by the user, the default value is false
6 If it is not provided by the user, the default value is 120000000
7 If it is not provided by the user, the default value is GRCh38 diff --git a/nextflow.config b/nextflow.config index bd00b2e8..4d462a27 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,8 +32,8 @@ params { // Alignment star_two_pass_mode = 'Basic' - switch_subsample_region = true - switch_downsample = true + skip_subsample_region = false + skip_downsample = false num_reads = 120000000 seed_frac = 0.001 save_mapped_as_cram = true @@ -41,16 +41,16 @@ params { // Variant calling variant_caller = 'bcftools' bcftools_caller_mode = 'multiallelic' - switch_build_tracks = true - switch_stringtie = true - switch_drop_ae = true - switch_drop_as = true + skip_build_tracks = false + skip_stringtie = false + skip_drop_ae = false + skip_drop_as = false drop_group_samples_ae = 'outrider' drop_group_samples_as = 'fraser' drop_padjcutoff_ae = 0.05 drop_padjcutoff_as = 0.1 drop_zscorecutoff = 0 - switch_vep = true + skip_vep = false // Variant annotation vep_cache_version = 110 diff --git a/nextflow_schema.json b/nextflow_schema.json index 75c51bd0..7ad2c5f8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -205,16 +205,16 @@ "enum": ["Basic", "None"], "fa_icon": "fas fa-tachometer-alt" }, - "switch_subsample_region": { + "skip_subsample_region": { "type": "boolean", - "default": true, - "description": "Do you want to subsample the region subsample_bed to the fraction given in seed_frac?", + "default": false, + "description": "Should subsampling the region subsample_bed to the fraction given in seed_frac be skipped?", "fa_icon": "fas fa-toggle-off" }, - "switch_downsample": { + "skip_downsample": { "type": "boolean", - "default": true, - "description": "Do you want to downsample the number of reads to num_reads?", + "default": false, + "description": "Should downsampling the number of reads to num_reads be skipped?", "fa_icon": "fas fa-toggle-off" }, "subsample_bed": { @@ -260,34 +260,34 @@ "enum": ["consensus", "multiallelic"], "help_text": "Bcftools call can eitherbe run in multiallelic mode or in consensus mode. In consensus mode a p-value threshold of 0.01 is applied." }, - "switch_build_tracks": { + "skip_build_tracks": { "type": "boolean", - "default": true, - "description": "Should tracks be build?", + "default": false, + "description": "Should track building be skipped?", "fa_icon": "fas fa-toggle-off" }, - "switch_stringtie": { + "skip_stringtie": { "type": "boolean", - "default": true, - "description": "Should stringtie module be run?", + "default": false, + "description": "Should stringtie module be skipped?", "fa_icon": "fas fa-toggle-off" }, - "switch_vep": { + "skip_vep": { "type": "boolean", - "default": true, - "description": "Should VEP module be run?", + "default": false, + "description": "Should VEP module be skipped?", "fa_icon": "fas fa-toggle-off" }, - "switch_drop_ae": { + "skip_drop_ae": { "type": "boolean", - "default": true, - "description": "Should DROP Aberrant Expression module be run?", + "default": false, + "description": "Should DROP Aberrant Expression module be skipped?", "fa_icon": "fas fa-toggle-off" }, - "switch_drop_as": { + "skip_drop_as": { "type": "boolean", - "default": true, - "description": "Should DROP Aberrant Splicing module be run?", + "default": false, + "description": "Should DROP Aberrant Splicing module be skipped?", "fa_icon": "fas fa-toggle-off" }, "drop_group_samples_ae": { diff --git a/subworkflows/local/alignment.nf b/subworkflows/local/alignment.nf index ce6979ad..38066ab0 100644 --- a/subworkflows/local/alignment.nf +++ b/subworkflows/local/alignment.nf @@ -13,17 +13,17 @@ include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' workflow ALIGNMENT { take: - reads // channel: [mandatory] [ val(meta), [path(reads)] ] - star_index // channel: [mandatory] [ val(meta), path(star_index) ] - ch_gtf // channel: [mandatory] [ val(meta), path(gtf) ] - ch_platform // channel: [mandatory] [ val(platform) ] - subsample_bed // channel: [optional] [ path(subsample_bed) ] - seed_frac // parameter: [optional] default: 0.001 - num_reads // parameter: [optional] default: 120000000 - switch_subsample_region // parameter: [mandatory] default: true - switch_downsample // parameter: [mandatory] default: true - salmon_index // channel: [mandatory] [ path(salmon_index) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + reads // channel: [mandatory] [ val(meta), [path(reads)] ] + star_index // channel: [mandatory] [ val(meta), path(star_index) ] + ch_gtf // channel: [mandatory] [ val(meta), path(gtf) ] + ch_platform // channel: [mandatory] [ val(platform) ] + subsample_bed // channel: [optional] [ path(subsample_bed) ] + seed_frac // parameter: [optional] default: 0.001 + num_reads // parameter: [optional] default: 120000000 + skip_subsample_region // parameter: [mandatory] default: true + skip_downsample // parameter: [mandatory] default: true + salmon_index // channel: [mandatory] [ path(salmon_index) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] main: ch_versions = Channel.empty() @@ -43,11 +43,11 @@ workflow ALIGNMENT { ch_bam_bai = Channel.empty() ch_bam_bai_out = Channel.empty() - if (switch_subsample_region) { + if (!skip_subsample_region) { RNA_SUBSAMPLE_REGION( STAR_ALIGN.out.bam, subsample_bed, seed_frac) ch_bam_bai = ch_bam_bai.mix(RNA_SUBSAMPLE_REGION.out.bam_bai) ch_versions = ch_versions.mix(RNA_SUBSAMPLE_REGION.out.versions.first()) - if (!switch_downsample) { + if (skip_downsample) { ch_bam_bai_out = RNA_SUBSAMPLE_REGION.out.bam_bai } else { RNA_DOWNSAMPLE( ch_bam_bai, num_reads) @@ -56,7 +56,7 @@ workflow ALIGNMENT { } } else { ch_bam_bai = ch_bam_bai.mix(STAR_ALIGN.out.bam.join(SAMTOOLS_INDEX.out.bai)) - if (!switch_downsample) { + if (skip_downsample) { ch_bam_bai_out = STAR_ALIGN.out.bam.join(SAMTOOLS_INDEX.out.bai) } else { RNA_DOWNSAMPLE( ch_bam_bai, num_reads) diff --git a/subworkflows/local/analyse_transcripts.nf b/subworkflows/local/analyse_transcripts.nf index 1c6a1241..6ef6168d 100644 --- a/subworkflows/local/analyse_transcripts.nf +++ b/subworkflows/local/analyse_transcripts.nf @@ -27,6 +27,7 @@ workflow ANALYSE_TRANSCRIPTS { drop_zscorecutoff // parameter: [optional] default: 0 ch_gene_panel_clinical_filter // channel: [optional] [ path(tsv) ] case_info // channel: [optional] [ val(case_id) ] + skip_drop_ae // parameter: [mandatory] default: 'false' main: ch_versions = Channel.empty() @@ -83,7 +84,7 @@ workflow ANALYSE_TRANSCRIPTS { : Channel.empty() ch_out_drop_as_tsv = DROP_CONFIG_RUN_AS.out.drop_as_tsv ? DROP_CONFIG_RUN_AS.out.drop_as_tsv.collect() : Channel.empty() - ch_out_drop_gene_name = params.switch_drop_ae ? ch_out_drop_gene_name_ae : ch_out_drop_gene_name_as + ch_out_drop_gene_name = (!skip_drop_ae) ? ch_out_drop_gene_name_ae : ch_out_drop_gene_name_as DROP_FILTER_RESULTS( case_info, diff --git a/subworkflows/local/utils_nfcore_tomte_pipeline/main.nf b/subworkflows/local/utils_nfcore_tomte_pipeline/main.nf index 5fa61166..f2ab34e1 100644 --- a/subworkflows/local/utils_nfcore_tomte_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_tomte_pipeline/main.nf @@ -220,18 +220,18 @@ def toolCitationText() { "Tools used in the workflow included:", "BCFtools (Danecek et al. 2021),", "DROP (Yépez et al. 2021),", - params.switch_vep ? "EnsemblVEP (McLaren et al. 2016)," : "", + (!params.skip_vep) ? "EnsemblVEP (McLaren et al. 2016)," : "", "fastp (Chen et al. 2018),", "FastQC (Andrews 2010),", - params.switch_drop_as ? "FRASER (Mertes et al 2021)," : "", + (!params.skip_drop_as) ? "FRASER (Mertes et al 2021)," : "", "GATK (McKenna et al. 2010),", - params.switch_stringtie ? "GFFCompare (Pertea et al. 2020), StringTie (Pertea et al. 2015)," : "", + (!params.skip_stringtie) ? "GFFCompare (Pertea et al. 2020), StringTie (Pertea et al. 2015)," : "", "MultiQC (Ewels et al. 2016),", - params.switch_drop_ae ? "OUTRIDER (Brechtmann et al. 2018)," : "", + (!params.skip_drop_ae) ? "OUTRIDER (Brechtmann et al. 2018)," : "", "SAMtools (Danecek et al. 2021),", "Salmon (Patro et al. 2017),", "STAR (Dobin et al. 2012),", - params.switch_build_tracks ? "UCSC tools (Kent et al. 2010)" : "", + (!params.skip_build_tracks) ? "UCSC tools (Kent et al. 2010)" : "", "." ].join(' ').trim() @@ -241,23 +241,23 @@ def toolCitationText() { def toolBibliographyText() { def reference_text = [ "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - params.switch_drop_ae ? "
  • Brechtmann F, Mertes C, Matusevičiūtė A, et al. OUTRIDER: A Statistical Method for Detecting Aberrantly Expressed Genes in RNA Sequencing Data. The American Journal of Human Genetics. 12 2018;103:907-917. doi:10.1016/J.AJHG.2018.10.025
  • " : "", + (!params.skip_drop_ae) ? "
  • Brechtmann F, Mertes C, Matusevičiūtė A, et al. OUTRIDER: A Statistical Method for Detecting Aberrantly Expressed Genes in RNA Sequencing Data. The American Journal of Human Genetics. 12 2018;103:907-917. doi:10.1016/J.AJHG.2018.10.025
  • " : "", "
  • Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics (Oxford, England). 9 2018;34:i884-i890. doi:10.1093/BIOINFORMATICS/BTY560
  • ", "
  • Dale R, Grüning B, Sjödin A, et al. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature methods. 7 2018;15:475-476. doi:10.1038/S41592-018-0046-7
  • ", "
  • Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 1 2021;10:1-4. doi:10.1093/GIGASCIENCE/GIAB008
  • ", "
  • Dobin A, Davis CA, Schlesinger F, et al. STAR: ultrafast universal RNA-seq aligner. Bioinformatics. 10 2012;29:15-21. doi:10.1093/bioinformatics/bts635
  • ", "
  • Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics (Oxford, England). 10 2016;32:3047-3048. doi:10.1093/BIOINFORMATICS/BTW354
  • ", "
  • Ewels PA, Peltzer A, Fillinger S, et al. The nf-core framework for community-curated bioinformatics pipelines. Nature biotechnology. 3 2020;38:276-278. doi:10.1038/S41587-020-0439-X
  • ", - params.switch_build_tracks ? "
  • Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 9 2010;26:2204-2207. doi:10.1093/BIOINFORMATICS/BTQ351
  • " : "", + (!params.skip_build_tracks) ? "
  • Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 9 2010;26:2204-2207. doi:10.1093/BIOINFORMATICS/BTQ351
  • " : "", "
  • Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PloS one. 5 2017;12. doi:10.1371/JOURNAL.PONE.0177459
  • ", "
  • Leprevost FDV, Grüning BA, Aflitos SA, et al. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England). 8 2017;33:2580-2582. doi:10.1093/BIOINFORMATICS/BTX192
  • ", "
  • McKenna A, Hanna M, Banks E, et al. The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Research. 9 2010;20:1297-1303. doi:10.1101/GR.107524.110
  • ", - params.switch_vep ? "
  • McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome biology. 6 2016;17. doi:10.1186/S13059-016-0974-4
  • " : "", + (!params.skip_vep) ? "
  • McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome biology. 6 2016;17. doi:10.1186/S13059-016-0974-4
  • " : "", "
  • MerkelDirk. Docker. Linux Journal. Published online 3 2014. doi:10.5555/2600239.2600241
  • ", - params.switch_drop_as ? "
  • Mertes C, Scheller IF, Yépez VA, et al. Detection of aberrant splicing events in RNA-seq data using FRASER. Nature Communications 2021 12:1. 1 2021;12:1-13. doi:10.1038/s41467-020-20573-7
  • " : "", + (!params.skip_drop_as) ? "
  • Mertes C, Scheller IF, Yépez VA, et al. Detection of aberrant splicing events in RNA-seq data using FRASER. Nature Communications 2021 12:1. 1 2021;12:1-13. doi:10.1038/s41467-020-20573-7
  • " : "", "
  • Patro R, Duggal G, Love MI, Irizarry RA, Kingsford C. Salmon provides fast and bias-aware quantification of transcript expression. Nature methods. 2017;14:417-419. doi:10.1038/NMETH.4197
  • ", - params.switch_stringtie ? "
  • Pertea M, Pertea G. GFF Utilities: GffRead and GffCompare. F1000Research. 9 2020;9:304. doi:10.12688/F1000RESEARCH.23297.1
  • " : "", - params.switch_stringtie ? "
  • Pertea M, Pertea GM, Antonescu CM, Chang TC, Mendell JT, Salzberg SL. StringTie enables improved reconstruction of a transcriptome from RNA-seq reads. Nature biotechnology. 2015;33:290-295. doi:10.1038/NBT.3122
  • ": "" + (!params.skip_stringtie) ? "
  • Pertea M, Pertea G. GFF Utilities: GffRead and GffCompare. F1000Research. 9 2020;9:304. doi:10.12688/F1000RESEARCH.23297.1
  • " : "", + (!params.skip_stringtie) ? "
  • Pertea M, Pertea GM, Antonescu CM, Chang TC, Mendell JT, Salzberg SL. StringTie enables improved reconstruction of a transcriptome from RNA-seq reads. Nature biotechnology. 2015;33:290-295. doi:10.1038/NBT.3122
  • ": "" ].join(' ').trim() return reference_text diff --git a/workflows/tomte.nf b/workflows/tomte.nf index e5ebee68..b1e12864 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -124,8 +124,8 @@ workflow TOMTE { ch_subsample_bed, params.seed_frac, params.num_reads, - params.switch_subsample_region, - params.switch_downsample, + params.skip_subsample_region, + params.skip_downsample, ch_references.salmon_index, ch_references.fasta ).set { ch_alignment } @@ -156,6 +156,7 @@ workflow TOMTE { params.drop_zscorecutoff, ch_gene_panel_clinical_filter, ch_case_info + params.skip_drop_ae ) ch_versions = ch_versions.mix(ANALYSE_TRANSCRIPTS.out.versions) From 18d515ce69ad867dfd9661eda54430cb0faf6329 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 10 Apr 2024 10:23:58 +0200 Subject: [PATCH 19/45] fix comma --- workflows/tomte.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/tomte.nf b/workflows/tomte.nf index b1e12864..3c08e382 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -155,7 +155,7 @@ workflow TOMTE { params.drop_padjcutoff_as, params.drop_zscorecutoff, ch_gene_panel_clinical_filter, - ch_case_info + ch_case_info, params.skip_drop_ae ) ch_versions = ch_versions.mix(ANALYSE_TRANSCRIPTS.out.versions) From aa440720f53f6d68593b4802a98a61a6e6978ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Thu, 11 Apr 2024 08:03:06 +0200 Subject: [PATCH 20/45] Apply suggestions from code review Co-authored-by: Anders Jemt --- docs/usage.md | 2 +- nextflow_schema.json | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 681f576d..6c02ee7b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -123,7 +123,7 @@ If you would like to see more examples of what a typical samplesheet looks like In genomic-medicine-sweden/tomte, references can be supplied using parameters. We have also introduced the possiblility of using the `--igenomes_base` parameter to point to a path where genome specific reference files are placed (fasta, fai, gtf, star_index, salmon_index, subsample_bed). To make sure that the names of the reference files match those in your directory, check [igenomes.config](https://github.com/genomic-medicine-sweden/tomte/blob/master/conf/igenomes.config). -Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can call SNVs either with BCFtools or with GATK. You also have the option to turn off sections of the pipeline if you do not want to run them. For example, drop aberrant expression module can be turned off by setting `--skip_drop_ae TRUE`. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, if you are not running DROP aberrant splicing, you do not need to provide `--reference_drop_splice_folder`. +Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can call SNVs either with BCFtools or with GATK. You also have the option to turn off sections of the pipeline if you do not want to run them. For example, drop aberrant expression module can be turned off by setting `--skip_drop_ae true`. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, if you are not running DROP aberrant splicing, you do not need to provide `--reference_drop_splice_folder`. genomic-medicine-sweden/tomte consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories: diff --git a/nextflow_schema.json b/nextflow_schema.json index 7ad2c5f8..14e33be1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -208,13 +208,13 @@ "skip_subsample_region": { "type": "boolean", "default": false, - "description": "Should subsampling the region subsample_bed to the fraction given in seed_frac be skipped?", + "description": "Turn off subsampling of the region. The region is defined by the subsample_bed parameter and the fraction is given by seed_frac", "fa_icon": "fas fa-toggle-off" }, "skip_downsample": { "type": "boolean", "default": false, - "description": "Should downsampling the number of reads to num_reads be skipped?", + "description": "Skip downsampling before expression/splicing analysis. The number of reads to be used is defined by num_reads.", "fa_icon": "fas fa-toggle-off" }, "subsample_bed": { @@ -263,31 +263,31 @@ "skip_build_tracks": { "type": "boolean", "default": false, - "description": "Should track building be skipped?", + "description": "Skip building splice junction tracks for IGV.", "fa_icon": "fas fa-toggle-off" }, "skip_stringtie": { "type": "boolean", "default": false, - "description": "Should stringtie module be skipped?", + "description": "Skip analysis with StringTie", "fa_icon": "fas fa-toggle-off" }, "skip_vep": { "type": "boolean", "default": false, - "description": "Should VEP module be skipped?", + "description": "Skip Ensembl Variant Effect Predictor", "fa_icon": "fas fa-toggle-off" }, "skip_drop_ae": { "type": "boolean", "default": false, - "description": "Should DROP Aberrant Expression module be skipped?", + "description": "Skip DROP Aberrant Expression module ", "fa_icon": "fas fa-toggle-off" }, "skip_drop_as": { "type": "boolean", "default": false, - "description": "Should DROP Aberrant Splicing module be skipped?", + "description": "Skip DROP Aberrant Splicing module", "fa_icon": "fas fa-toggle-off" }, "drop_group_samples_ae": { From b8946e8627ec2ab4a549811b1eea3b4941e43879 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 17 Apr 2024 12:35:28 +0200 Subject: [PATCH 21/45] removed vep_filters parameter will now be generated from clinical panel --- CHANGELOG.md | 7 ++ conf/modules/annotate_snv.config | 36 +++++++++- conf/test.config | 1 - docs/output.md | 6 +- docs/usage.md | 12 ++-- main.nf | 1 - modules.json | 10 +++ modules/local/create_hgncids_file.nf | 61 +++++++++++++++++ .../ensemblvep/filtervep/environment.yml | 7 ++ modules/nf-core/ensemblvep/filtervep/main.nf | 50 ++++++++++++++ modules/nf-core/ensemblvep/filtervep/meta.yml | 46 +++++++++++++ modules/nf-core/gawk/environment.yml | 7 ++ modules/nf-core/gawk/main.nf | 54 +++++++++++++++ modules/nf-core/gawk/meta.yml | 50 ++++++++++++++ modules/nf-core/gawk/tests/main.nf.test | 56 +++++++++++++++ modules/nf-core/gawk/tests/main.nf.test.snap | 68 +++++++++++++++++++ modules/nf-core/gawk/tests/nextflow.config | 6 ++ .../tests/nextflow_with_program_file.config | 5 ++ modules/nf-core/gawk/tests/tags.yml | 2 + nextflow_schema.json | 9 +-- subworkflows/local/analyse_transcripts.nf | 2 +- subworkflows/local/annotate_snv.nf | 68 +++++++++++++++---- workflows/tomte.nf | 3 +- 23 files changed, 532 insertions(+), 35 deletions(-) create mode 100644 modules/local/create_hgncids_file.nf create mode 100644 modules/nf-core/ensemblvep/filtervep/environment.yml create mode 100644 modules/nf-core/ensemblvep/filtervep/main.nf create mode 100644 modules/nf-core/ensemblvep/filtervep/meta.yml create mode 100644 modules/nf-core/gawk/environment.yml create mode 100644 modules/nf-core/gawk/main.nf create mode 100644 modules/nf-core/gawk/meta.yml create mode 100644 modules/nf-core/gawk/tests/main.nf.test create mode 100644 modules/nf-core/gawk/tests/main.nf.test.snap create mode 100644 modules/nf-core/gawk/tests/nextflow.config create mode 100644 modules/nf-core/gawk/tests/nextflow_with_program_file.config create mode 100644 modules/nf-core/gawk/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index a1cd7546..59e29cf2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Parameter has been removed if new parameter information isn't present. - Added better documentation on subworkflow input [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - Added option to add extra arguments to DROP aberrant expression and aberrant splicing [#104](https://github.com/genomic-medicine-sweden/tomte/pull/104) - Added a function to branch references into compressed/uncompressed [#107](https://github.com/genomic-medicine-sweden/tomte/pull/107) +- Added local module create_hgncids_file.nf and nf-core module filter vep to create a clinical vcf [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) ### `Fixed` @@ -29,9 +30,15 @@ Parameter has been removed if new parameter information isn't present. - Updated GATK4_ASEREADCOUNTER, now bam and vcf will be given as one channel [#103](https://github.com/genomic-medicine-sweden/tomte/pull/103) - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) - FastQC have been updated to correctly allocate memory [#106](https://github.com/genomic-medicine-sweden/tomte/pull/106) +- vep_hgnc is now extracted from gene_panel_clinical_filter [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) ### `Parameters` +- Removed `--vep_filters`, it will now be automatically extracted from the `--gene_panel_clinical_filter`[#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) + | Old parameter | New parameter | + | --------------------------- | ------------------------- | + | `--vep_filters` | | + - Updated parameter names to make their use easier and more clear, changing the names from `switch` to `skip` and their default value from `true` to `false` [#108](https://github.com/genomic-medicine-sweden/tomte/pull/108) | Old parameter | New parameter | diff --git a/conf/modules/annotate_snv.config b/conf/modules/annotate_snv.config index e789b507..d816fc57 100644 --- a/conf/modules/annotate_snv.config +++ b/conf/modules/annotate_snv.config @@ -37,7 +37,41 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*ANNOTATE_SNV:TABIX_VEP' { + + withName: '.*ANNOTATE_SNV:RENAME_FILES' { + ext.when = { (!params.skip_vep) } + ext.prefix = { "${meta.id}_vep_${meta.set}" } + } + + withName: '.*ANNOTATE_SNV:TABIX_TABIX' { + ext.when = { (!params.skip_vep) } + ext.prefix = { "${meta.id}_vep_${meta.set}" } + publishDir = [ + path: { "${params.outdir}/annotate_vep" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_SNV:GAWK' { + ext.when = { (!params.skip_vep) } + ext.args2 = { '\'!/^#/ {print $4}\'' } + } + + withName: '.*ANNOTATE_SNV:ENSEMBLVEP_FILTERVEP' { + ext.when = { (!params.skip_vep && params.gene_panel_clinical_filter) } + ext.prefix = { "${meta.id}_vep_${meta.set}" } + ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } + publishDir = [ + path: { "${params.outdir}/annotate_vep" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_SNV:TABIX_BGZIPTABIX' { + ext.when = { (!params.skip_vep && params.gene_panel_clinical_filter) } + ext.prefix = { "${meta.id}_vep_${meta.set}" } publishDir = [ path: { "${params.outdir}/annotate_vep" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index eeb61a26..0dee458a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -36,7 +36,6 @@ params { // VEP vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" - vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" vep_cache_version = 107 vep_plugin_files = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv" diff --git a/docs/output.md b/docs/output.md index 5e24b011..a55e9efd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -212,8 +212,10 @@ to "gatk". Involves several steps: [`SplitN Cigar Reads`](https://gatk.broadinst Output files - `annotate_vep` - - `*ase_vep.vcf.gz`: annotated vcf - - `*ase_vep.vcf.gz.tbi`: index for annotated vcf + - `*vep_research.vcf.gz`: annotated vcf conatining all snvs + - `*vep_research.vcf.gz.tbi`: index for annotated vcf containing all snvs + - `*vep_clinical.vcf.gz`: annotated vcf filtered according to genes provided by gene_panel_clinical_filter + - `*vep_clinical.vcf.gz.tbi`: index for annotated vcf filtered according to genes provided by gene_panel_clinical_filter diff --git a/docs/usage.md b/docs/usage.md index 6c02ee7b..d10b7af9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -191,12 +191,12 @@ The mandatory and optional parameters for each category are tabulated below. #### 5. SNV annotation (ensembl VEP) -| Mandatory | Optional | -| ---------------------------- | --------------------- | -| vep_plugin_files1 | skip_vep2 | -| | vep_cache3 | -| | vep_cache_version | -| | vep_filters | +| Mandatory | Optional | +| ---------------------------- | -------------------------- | +| vep_plugin_files1 | skip_vep2 | +| | vep_cache3 | +| | vep_cache_version | +| | gene_panel_clinical_filter | 1 VEP caches can be downloaded [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#cache). VEP plugins may be installed in the cache directory, and the plugin pLI is mandatory to install. To supply files required by VEP plugins, use `vep_plugin_files` parameter. See example cache [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz).
    2 If it is not provided by the user, the default value is false
    diff --git a/main.nf b/main.nf index c1cca78c..207e9b89 100644 --- a/main.nf +++ b/main.nf @@ -39,7 +39,6 @@ params.star_index = getGenomeAttribute('star_index') params.salmon_index = getGenomeAttribute('salmon_index') params.transcript_fasta = getGenomeAttribute('transcript_fasta') params.vep_cache = getGenomeAttribute('vep_cache') -params.vep_filters = getGenomeAttribute('vep_filters') params.vep_plugin_files = getGenomeAttribute('vep_plugin_files') /* diff --git a/modules.json b/modules.json index 74eae697..5c4d40a7 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,11 @@ "git_sha": "0997b47c93c06b49aa7b3fefda87e728312cf2ca", "installed_by": ["modules"] }, + "ensemblvep/filtervep": { + "branch": "master", + "git_sha": "1e6b8886d30f7a7129dcd1ff46ef3a20038294d5", + "installed_by": ["modules"] + }, "ensemblvep/vep": { "branch": "master", "git_sha": "76a0696a60c41c57fc5f6040ac31b11ce5d4d8dd", @@ -81,6 +86,11 @@ "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, + "gawk": { + "branch": "master", + "git_sha": "da4d05d04e65227d4307e87940842f1a14de62c7", + "installed_by": ["modules"] + }, "gffcompare": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/local/create_hgncids_file.nf b/modules/local/create_hgncids_file.nf new file mode 100644 index 00000000..a6e1abdb --- /dev/null +++ b/modules/local/create_hgncids_file.nf @@ -0,0 +1,61 @@ +process CREATE_HGNCIDS_FILE { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + tuple val(meta), path(input) + + output: + path("*_reformatted.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + python3 < versions.yml + "${task.process}": + create_hgncids_file: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + python3 < versions.yml + "${task.process}": + create_hgncids_file: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/filtervep/environment.yml b/modules/nf-core/ensemblvep/filtervep/environment.yml new file mode 100644 index 00000000..07cb9dba --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_filtervep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=111.0 diff --git a/modules/nf-core/ensemblvep/filtervep/main.nf b/modules/nf-core/ensemblvep/filtervep/main.nf new file mode 100644 index 00000000..7eec3cb7 --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/main.nf @@ -0,0 +1,50 @@ +process ENSEMBLVEP_FILTERVEP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:111.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(input) + path (feature_file) + + output: + tuple val(meta), path("*.${extension}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "vcf" + """ + filter_vep \\ + $args \\ + --input_file $input \\ + --output_file ${prefix}.${extension} \\ + --only_matched + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "vcf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} + diff --git a/modules/nf-core/ensemblvep/filtervep/meta.yml b/modules/nf-core/ensemblvep/filtervep/meta.yml new file mode 100644 index 00000000..bde3aa16 --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/meta.yml @@ -0,0 +1,46 @@ +name: ensemblvep_filtervep +description: Filter variants based on Ensembl Variant Effect Predictor (VEP) annotations. +keywords: + - annotation + - vcf + - tab + - filter +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - input: + type: file + description: VCF/TAB file annotated with vep + pattern: "*.{vcf,tab,tsv,txt}" + - feature_file: + type: file + description: File containing features on separate lines. To be used with --filter option. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: VCF/TAB file + pattern: "*.{vcf,tab,txt,tsv}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml new file mode 100644 index 00000000..34513c7f --- /dev/null +++ b/modules/nf-core/gawk/environment.yml @@ -0,0 +1,7 @@ +name: gawk +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - anaconda::gawk=5.1.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 00000000..f856a1f8 --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,54 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(input) + path(program_file) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + + program = program_file ? "-f ${program_file}" : "${args2}" + + """ + awk \\ + ${args} \\ + ${program} \\ + ${input} \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension}" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 00000000..2b6033b0 --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,50 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on this file on the `ext.args2` or in the program file + pattern: "*" + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: The output file - specify the name of this file using `ext.prefix` and the extension using `ext.suffix` + pattern: "*" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test new file mode 100644 index 00000000..fce82ca9 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GAWK" + script "../main.nf" + process "GAWK" + + tag "modules" + tag "modules_nfcore" + tag "gawk" + + test("convert fasta to bed") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("convert fasta to bed with program file") { + config "./nextflow_with_program_file.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = Channel.of('BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap new file mode 100644 index 00000000..ce207478 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "convert fasta to bed with program file": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,4c320d8c98ca80690afd7651da1ba520" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,4c320d8c98ca80690afd7651da1ba520" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-05T11:00:28.097563" + }, + "convert fasta to bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,4c320d8c98ca80690afd7651da1ba520" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,4c320d8c98ca80690afd7651da1ba520" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-05T10:28:15.625869" + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config new file mode 100644 index 00000000..6e5d43a3 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GAWK { + ext.suffix = "bed" + ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } +} diff --git a/modules/nf-core/gawk/tests/nextflow_with_program_file.config b/modules/nf-core/gawk/tests/nextflow_with_program_file.config new file mode 100644 index 00000000..693ad419 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow_with_program_file.config @@ -0,0 +1,5 @@ +process { + withName: GAWK { + ext.suffix = "bed" + } +} diff --git a/modules/nf-core/gawk/tests/tags.yml b/modules/nf-core/gawk/tests/tags.yml new file mode 100644 index 00000000..72e4531d --- /dev/null +++ b/modules/nf-core/gawk/tests/tags.yml @@ -0,0 +1,2 @@ +gawk: + - "modules/nf-core/gawk/**" diff --git a/nextflow_schema.json b/nextflow_schema.json index 14e33be1..2a0c7fda 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -153,13 +153,6 @@ "fa_icon": "fas fa-folder-open", "enum": [107, 110] }, - "vep_filters": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-file-csv", - "description": "File containing HGNC_IDs of interest on separate lines.", - "hidden": true - }, "vep_plugin_files": { "type": "string", "exists": true, @@ -341,7 +334,7 @@ }, "gene_panel_clinical_filter": { "type": "string", - "description": "If you are running DROP, tsv file containing genes on which results will be filtered to avoid incidental findings", + "description": "tsv file containing genes on which results from drop and vep will be filtered to avoid incidental findings, the fourth column must contain hgnc ids", "fa_icon": "fas fa-file" } } diff --git a/subworkflows/local/analyse_transcripts.nf b/subworkflows/local/analyse_transcripts.nf index 6ef6168d..0bfdd250 100644 --- a/subworkflows/local/analyse_transcripts.nf +++ b/subworkflows/local/analyse_transcripts.nf @@ -88,7 +88,7 @@ workflow ANALYSE_TRANSCRIPTS { DROP_FILTER_RESULTS( case_info, - ch_gene_panel_clinical_filter, + ch_gene_panel_clinical_filter.ifEmpty([]), ch_out_drop_ae_rds.ifEmpty([]), ch_out_drop_gene_name, ch_out_drop_as_tsv.ifEmpty([]) diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index 5f2256d2..be51f855 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -2,18 +2,25 @@ // Annotating SNVs // -include { ENSEMBLVEP_VEP } from '../../modules/nf-core/ensemblvep/vep/main' -include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/tabix/tabix/main' +include { ENSEMBLVEP_VEP } from '../../modules/nf-core/ensemblvep/vep/main' +include { RENAME_FILES } from '../../modules/local/rename_files' +include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { GAWK } from '../../modules/nf-core/gawk/main' +include { CREATE_HGNCIDS_FILE } from '../../modules/local/create_hgncids_file.nf' +include { ENSEMBLVEP_FILTERVEP } from '../../modules/nf-core/ensemblvep/filtervep/main' + workflow ANNOTATE_SNV { take: - vcf // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] - val_vep_genome // parameter: [mandatory] 'GRCh37' or 'GRCh38' - val_vep_cache_version // parameter: [mandatory] default: 110 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_vep_extra_files // channel: [mandatory] [ path(files) ] + vcf // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] + val_vep_genome // parameter: [mandatory] 'GRCh37' or 'GRCh38' + val_vep_cache_version // parameter: [mandatory] default: 110 + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_vep_extra_files // channel: [mandatory] [ path(files) ] + ch_gene_panel_clinical_filter // channel: [optional] [ path(file) ] main: ch_versions = Channel.empty() @@ -28,17 +35,52 @@ workflow ANNOTATE_SNV { ch_fasta, ch_vep_extra_files ) - ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions.first()) - TABIX_VEP(ENSEMBLVEP_VEP.out.vcf) - ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) + ENSEMBLVEP_VEP.out.vcf + .multiMap { meta, vcf -> + clinical: [ meta + [ set: "clinical" ], vcf ] + research: [ meta + [ set: "research" ], vcf ] + } + .set { ch_clin_research_vcf } + + RENAME_FILES( ch_clin_research_vcf.research ) + + TABIX_TABIX( RENAME_FILES.out.output ) + ch_vcf_research = RENAME_FILES.out.output.join(TABIX_TABIX.out.tbi) + + GAWK( ch_gene_panel_clinical_filter.map{it -> [[id:'hgnc'], it]}.collect(), + [] ) + + // Generate Clinical filter + CREATE_HGNCIDS_FILE( GAWK.out.output ) + .txt + .set {ch_hgnc_ids} + + //Filter results + ENSEMBLVEP_FILTERVEP( + ch_clin_research_vcf.clinical, + ch_hgnc_ids + ) + .output + .set { ch_filtervep_out } + + TABIX_BGZIPTABIX( ch_filtervep_out ) + ch_vcf_clin = TABIX_BGZIPTABIX.out.gz_tbi + + ch_versions = ch_versions.mix( ENSEMBLVEP_VEP.out.versions.first() ) + ch_versions = ch_versions.mix( GAWK.out.versions ) + ch_versions = ch_versions.mix( ENSEMBLVEP_FILTERVEP.out.versions ) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) + ch_versions = ch_versions.mix( TABIX_TABIX.out.versions ) emit: - vcf_gz = ENSEMBLVEP_VEP.out.vcf // channel: [ val(meta), path(vcf.gz) ] - tbi_gz = TABIX_VEP.out.tbi // channel: [ val(meta), path(tbi) ] + //vcf_gz = ENSEMBLVEP_VEP.out.vcf // channel: [ val(meta), path(vcf.gz) ] + //tbi_gz = TABIX_VEP.out.tbi // channel: [ val(meta), path(tbi) ] tab_gz = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab.gz) ] json_gz = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json.gz) ] report = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] + ch_vcf_clin = ch_vcf_clin // channel: [ val(meta), path(vcf.gz) path(tbi)] + ch_vcf_research = ch_vcf_research // channel: [ val(meta), path(vcf.gz) path(tbi)] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/tomte.nf b/workflows/tomte.nf index 3c08e382..c4aa5e2c 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -82,8 +82,6 @@ workflow TOMTE { : Channel.empty() ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() : Channel.value([]) - ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() - : Channel.value([]) // Read and store paths in the vep_plugin_files file @@ -187,6 +185,7 @@ workflow TOMTE { ch_references.vep_cache, ch_references.fasta, ch_vep_extra_files, + ch_gene_panel_clinical_filter ) ch_versions = ch_versions.mix(ANNOTATE_SNV.out.versions) From 638f80cf5a4f1e6cbcf9438f978843f9f85c1c0b Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 17 Apr 2024 12:39:50 +0200 Subject: [PATCH 22/45] fix linting --- conf/modules/annotate_snv.config | 2 +- subworkflows/local/annotate_snv.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules/annotate_snv.config b/conf/modules/annotate_snv.config index d816fc57..7ca1ea31 100644 --- a/conf/modules/annotate_snv.config +++ b/conf/modules/annotate_snv.config @@ -37,7 +37,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*ANNOTATE_SNV:RENAME_FILES' { ext.when = { (!params.skip_vep) } ext.prefix = { "${meta.id}_vep_${meta.set}" } diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index be51f855..43fb10e8 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -8,7 +8,7 @@ include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/ma include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' include { GAWK } from '../../modules/nf-core/gawk/main' include { CREATE_HGNCIDS_FILE } from '../../modules/local/create_hgncids_file.nf' -include { ENSEMBLVEP_FILTERVEP } from '../../modules/nf-core/ensemblvep/filtervep/main' +include { ENSEMBLVEP_FILTERVEP } from '../../modules/nf-core/ensemblvep/filtervep/main' @@ -42,7 +42,7 @@ workflow ANNOTATE_SNV { research: [ meta + [ set: "research" ], vcf ] } .set { ch_clin_research_vcf } - + RENAME_FILES( ch_clin_research_vcf.research ) TABIX_TABIX( RENAME_FILES.out.output ) From 057e9a9fb1d17ea6caf4cc3d1164b57f739c721b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 17 Apr 2024 14:53:02 +0200 Subject: [PATCH 23/45] Apply suggestions from code review --- subworkflows/local/annotate_snv.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index 43fb10e8..28040c8d 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -74,8 +74,6 @@ workflow ANNOTATE_SNV { ch_versions = ch_versions.mix( TABIX_TABIX.out.versions ) emit: - //vcf_gz = ENSEMBLVEP_VEP.out.vcf // channel: [ val(meta), path(vcf.gz) ] - //tbi_gz = TABIX_VEP.out.tbi // channel: [ val(meta), path(tbi) ] tab_gz = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab.gz) ] json_gz = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json.gz) ] report = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] From 0b609192257bab790a998b7df6a235ff62116028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 17 Apr 2024 14:53:14 +0200 Subject: [PATCH 24/45] Update subworkflows/local/annotate_snv.nf --- subworkflows/local/annotate_snv.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index 28040c8d..a65627a6 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -74,11 +74,11 @@ workflow ANNOTATE_SNV { ch_versions = ch_versions.mix( TABIX_TABIX.out.versions ) emit: - tab_gz = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab.gz) ] - json_gz = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json.gz) ] - report = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] - ch_vcf_clin = ch_vcf_clin // channel: [ val(meta), path(vcf.gz) path(tbi)] - ch_vcf_research = ch_vcf_research // channel: [ val(meta), path(vcf.gz) path(tbi)] - versions = ch_versions // channel: [ path(versions.yml) ] + tab_gz = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab.gz) ] + json_gz = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json.gz) ] + report = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] + ch_vcf_clin = ch_vcf_clin // channel: [ val(meta), path(vcf.gz) path(tbi)] + ch_vcf_research = ch_vcf_research // channel: [ val(meta), path(vcf.gz) path(tbi)] + versions = ch_versions // channel: [ path(versions.yml) ] } From 09d8fefd65188cbb9df69ada438c850aa0797ea9 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 17 Apr 2024 15:47:04 +0200 Subject: [PATCH 25/45] fix --- CHANGELOG.md | 10 +++++----- nextflow_schema.json | 2 +- subworkflows/local/annotate_snv.nf | 7 ++----- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59e29cf2..e64de5df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ Parameter has been removed if new parameter information isn't present. - Added better documentation on subworkflow input [#101](https://github.com/genomic-medicine-sweden/tomte/pull/101) - Added option to add extra arguments to DROP aberrant expression and aberrant splicing [#104](https://github.com/genomic-medicine-sweden/tomte/pull/104) - Added a function to branch references into compressed/uncompressed [#107](https://github.com/genomic-medicine-sweden/tomte/pull/107) -- Added local module create_hgncids_file.nf and nf-core module filter vep to create a clinical vcf [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) +- Added nf-core modules gawk and filter vep to create a clinical vcf [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) ### `Fixed` @@ -30,14 +30,14 @@ Parameter has been removed if new parameter information isn't present. - Updated GATK4_ASEREADCOUNTER, now bam and vcf will be given as one channel [#103](https://github.com/genomic-medicine-sweden/tomte/pull/103) - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) - FastQC have been updated to correctly allocate memory [#106](https://github.com/genomic-medicine-sweden/tomte/pull/106) -- vep_hgnc is now extracted from gene_panel_clinical_filter [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) +- vep_filters is now extracted from gene_panel_clinical_filter [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) ### `Parameters` - Removed `--vep_filters`, it will now be automatically extracted from the `--gene_panel_clinical_filter`[#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) - | Old parameter | New parameter | - | --------------------------- | ------------------------- | - | `--vep_filters` | | + | Old parameter | New parameter | + | --------------- | ------------- | + | `--vep_filters` | | - Updated parameter names to make their use easier and more clear, changing the names from `switch` to `skip` and their default value from `true` to `false` [#108](https://github.com/genomic-medicine-sweden/tomte/pull/108) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2a0c7fda..d1e4bc18 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -334,7 +334,7 @@ }, "gene_panel_clinical_filter": { "type": "string", - "description": "tsv file containing genes on which results from drop and vep will be filtered to avoid incidental findings, the fourth column must contain hgnc ids", + "description": "tsv file containing genes on which results from drop and vep will be filtered to avoid incidental findings, columns should be chromosome, gene_start, gene_stop, hgnc_id, hgnc_symbol", "fa_icon": "fas fa-file" } } diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index a65627a6..f7f0dd8e 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -7,7 +7,6 @@ include { RENAME_FILES } from '../../modules/local/rename_files' include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' include { GAWK } from '../../modules/nf-core/gawk/main' -include { CREATE_HGNCIDS_FILE } from '../../modules/local/create_hgncids_file.nf' include { ENSEMBLVEP_FILTERVEP } from '../../modules/nf-core/ensemblvep/filtervep/main' @@ -48,13 +47,11 @@ workflow ANNOTATE_SNV { TABIX_TABIX( RENAME_FILES.out.output ) ch_vcf_research = RENAME_FILES.out.output.join(TABIX_TABIX.out.tbi) + // Generate Clinical filter GAWK( ch_gene_panel_clinical_filter.map{it -> [[id:'hgnc'], it]}.collect(), [] ) - // Generate Clinical filter - CREATE_HGNCIDS_FILE( GAWK.out.output ) - .txt - .set {ch_hgnc_ids} + ch_hgnc_ids = GAWK.out.output.map{ meta, hgnc_ids -> [ hgnc_ids ] } //Filter results ENSEMBLVEP_FILTERVEP( From 63fda26c13339098b8ff66c5e066f5a45c02f113 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 17 Apr 2024 15:51:28 +0200 Subject: [PATCH 26/45] prettier --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e64de5df..6865303f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,9 +35,9 @@ Parameter has been removed if new parameter information isn't present. ### `Parameters` - Removed `--vep_filters`, it will now be automatically extracted from the `--gene_panel_clinical_filter`[#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) - | Old parameter | New parameter | + | Old parameter | New parameter | | --------------- | ------------- | - | `--vep_filters` | | + | `--vep_filters` | | - Updated parameter names to make their use easier and more clear, changing the names from `switch` to `skip` and their default value from `true` to `false` [#108](https://github.com/genomic-medicine-sweden/tomte/pull/108) From ed7c44b4cc4911c5b82244db2287b823cee894f7 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Wed, 17 Apr 2024 15:55:36 +0200 Subject: [PATCH 27/45] fix --- modules/local/create_hgncids_file.nf | 61 ---------------------------- 1 file changed, 61 deletions(-) delete mode 100644 modules/local/create_hgncids_file.nf diff --git a/modules/local/create_hgncids_file.nf b/modules/local/create_hgncids_file.nf deleted file mode 100644 index a6e1abdb..00000000 --- a/modules/local/create_hgncids_file.nf +++ /dev/null @@ -1,61 +0,0 @@ -process CREATE_HGNCIDS_FILE { - tag "$meta.id" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - tuple val(meta), path(input) - - output: - path("*_reformatted.txt"), emit: txt - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - python3 < versions.yml - "${task.process}": - create_hgncids_file: v1.0 - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ - - stub: - """ - python3 < versions.yml - "${task.process}": - create_hgncids_file: v1.0 - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} From d371c8303ded72c960deef556843c2c4c49319c0 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Fri, 19 Apr 2024 14:57:25 +0200 Subject: [PATCH 28/45] feat updated modules --- CHANGELOG.md | 1 + modules.json | 10 +- .../nf-core/bcftools/stats/environment.yml | 1 + .../nf-core/bcftools/stats/tests/main.nf.test | 182 +++++++++++++ .../bcftools/stats/tests/main.nf.test.snap | 160 +++++++++++ modules/nf-core/bcftools/stats/tests/tags.yml | 2 + .../nf-core/ensemblvep/vep/environment.yml | 2 +- modules/nf-core/ensemblvep/vep/main.nf | 4 +- .../nf-core/ensemblvep/vep/tests/main.nf.test | 40 ++- .../ensemblvep/vep/tests/main.nf.test.snap | 26 ++ .../ensemblvep/vep/tests/nextflow.config | 11 +- modules/nf-core/fastp/main.nf | 2 +- modules/nf-core/fastp/tests/main.nf.test | 18 +- modules/nf-core/fastp/tests/main.nf.test.snap | 10 +- .../fastp/tests/nextflow.interleaved.config | 5 + ...low.config => nextflow.save_failed.config} | 3 +- .../bedtointervallist/tests/main.nf.test | 38 +++ .../bedtointervallist/tests/main.nf.test.snap | 35 +++ .../gatk4/bedtointervallist/tests/tags.yml | 2 + .../nf-core/samtools/faidx/environment.yml | 4 +- modules/nf-core/samtools/faidx/meta.yml | 4 + .../nf-core/samtools/faidx/tests/main.nf.test | 122 +++++++++ .../samtools/faidx/tests/main.nf.test.snap | 249 ++++++++++++++++++ .../samtools/faidx/tests/nextflow.config | 7 + .../samtools/faidx/tests/nextflow2.config | 6 + modules/nf-core/samtools/faidx/tests/tags.yml | 2 + subworkflows/local/annotate_snv.nf | 1 + 27 files changed, 902 insertions(+), 45 deletions(-) create mode 100644 modules/nf-core/bcftools/stats/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/stats/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/stats/tests/tags.yml create mode 100644 modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastp/tests/nextflow.interleaved.config rename modules/nf-core/fastp/tests/{nextflow.config => nextflow.save_failed.config} (50%) create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/tags.yml create mode 100644 modules/nf-core/samtools/faidx/tests/main.nf.test create mode 100644 modules/nf-core/samtools/faidx/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/faidx/tests/nextflow.config create mode 100644 modules/nf-core/samtools/faidx/tests/nextflow2.config create mode 100644 modules/nf-core/samtools/faidx/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 6865303f..511e0190 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Parameter has been removed if new parameter information isn't present. - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) - FastQC have been updated to correctly allocate memory [#106](https://github.com/genomic-medicine-sweden/tomte/pull/106) - vep_filters is now extracted from gene_panel_clinical_filter [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) +- Updated modules bcftools/stats, ensemblvep/vep, fastp, gatk4/bedtointervallist, samtools/faidx ### `Parameters` diff --git a/modules.json b/modules.json index 5c4d40a7..d6efa4a8 100644 --- a/modules.json +++ b/modules.json @@ -23,7 +23,7 @@ }, "bcftools/stats": { "branch": "master", - "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "git_sha": "618364f55cb88f6c283f6c6c45c24d5f9f08f998", "installed_by": ["modules"] }, "bcftools/view": { @@ -43,12 +43,12 @@ }, "ensemblvep/vep": { "branch": "master", - "git_sha": "76a0696a60c41c57fc5f6040ac31b11ce5d4d8dd", + "git_sha": "3db4f8488315cd7d7cf3fcb64251f6603210e831", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", + "git_sha": "95cf5fe0194c7bf5cb0e3027a2eb7e7c89385080", "installed_by": ["modules"] }, "fastqc": { @@ -63,7 +63,7 @@ }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "d3f215802f696f7993f25c759781d2db91232015", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { @@ -129,7 +129,7 @@ }, "samtools/faidx": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "f153f1f10e1083c49935565844cccb7453021682", "installed_by": ["modules"] }, "samtools/index": { diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml index 1a969528..7bb40dc0 100644 --- a/modules/nf-core/bcftools/stats/environment.yml +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::bcftools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test new file mode 100644 index 00000000..f027f6b1 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test @@ -0,0 +1,182 @@ +nextflow_process { + + name "Test Process BCFTOOLS_STATS" + script "../main.nf" + process "BCFTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/stats" + + test("sarscov2 - vcf_gz") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - regions") { + + when { + process { + """ + input[0] = [ [ id:'regions_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + input[1] = [ [id:'regions_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("regions_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - targets") { + + when { + process { + """ + input[0] = [ [ id:'targets_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [id:'targets_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("targets_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - exons") { + + when { + process { + """ + input[0] = [ [ id:'exon_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [id: "exon_test"], + file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("exon_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - reference") { + + when { + process { + """ + input[0] = [ [ id:'ref_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [id: 'ref_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("ref_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + + test("sarscov2 - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..30691c32 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "sarscov2 - vcf_gz - reference": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:24.34147" + }, + "sarscov2 - vcf_gz - exons": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:18.378716" + }, + "exon_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:02:02.530551189" + }, + "ref_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:02:06.885381764" + }, + "sarscov2 - vcf_gz - targets": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:12.48194" + }, + "targets_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:01:58.412147664" + }, + "sarscov2 - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,5909d472a49b0aa2bfbbb1094c129e48" + ], + "stats": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5909d472a49b0aa2bfbbb1094c129e48" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T16:26:21.450513562" + }, + "version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T09:57:04.317347424" + }, + "regions_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:01:54.349855366" + }, + "sarscov2 - vcf_gz": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:01.862297" + }, + "sarscov2 - vcf_gz - regions": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:07.296109" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/tags.yml b/modules/nf-core/bcftools/stats/tests/tags.yml new file mode 100644 index 00000000..53c12d92 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/stats: + - "modules/nf-core/bcftools/stats/**" diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml index 7a127746..91457c05 100644 --- a/modules/nf-core/ensemblvep/vep/environment.yml +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::ensembl-vep=110.0 + - bioconda::ensembl-vep=111.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index a7fc5ad1..885efced 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -4,8 +4,8 @@ process ENSEMBLVEP_VEP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : - 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + 'https://depot.galaxyproject.org/singularity/ensembl-vep:111.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' }" input: tuple val(meta), path(vcf), path(custom_extra_files) diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test index f072dcab..4aff84a3 100644 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -1,26 +1,31 @@ nextflow_process { name "Test Process ENSEMBLVEP_VEP" - script "modules/nf-core/ensemblvep/vep/main.nf" + script "../main.nf" process "ENSEMBLVEP_VEP" config "./nextflow.config" + tag "modules" tag "modules_nfcore" tag "ensemblvep" tag "ensemblvep/vep" tag "ensemblvep/download" - test("test_ensemblvep_vep_fasta_vcf") { - config "./vcf.config" setup { run("ENSEMBLVEP_DOWNLOAD") { script "../../download/main.nf" + process { """ - input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) """ } } @@ -31,7 +36,7 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), [] ]) input[1] = params.vep_genome @@ -40,7 +45,7 @@ nextflow_process { input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } input[5] = Channel.value([ [id:"fasta"], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) input[6] = [] """ @@ -49,23 +54,29 @@ nextflow_process { then { assertAll( - {assert process.success}, - {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } ) } } test("test_ensemblvep_vep_fasta_tab_gz") { - config "./tab.gz.config" setup { run("ENSEMBLVEP_DOWNLOAD") { script "../../download/main.nf" + process { """ - input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) """ } } @@ -76,7 +87,7 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), [] ]) input[1] = params.vep_genome @@ -85,7 +96,7 @@ nextflow_process { input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } input[5] = Channel.value([ [id:"fasta"], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) input[6] = [] """ @@ -94,8 +105,9 @@ nextflow_process { then { assertAll( - {assert process.success}, - {assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v110.0")} + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v111.0") } ) } } diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap new file mode 100644 index 00000000..f937b299 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "test_ensemblvep_vep_fasta_tab_gz": { + "content": [ + [ + "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:35:20.694114" + }, + "test_ensemblvep_vep_fasta_vcf": { + "content": [ + [ + "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:34:41.093843" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config index cfaef733..882bce41 100644 --- a/modules/nf-core/ensemblvep/vep/tests/nextflow.config +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -1,13 +1,12 @@ params { - vep_cache_version = "110" - vep_genome = "WBcel235" - vep_species = "caenorhabditis_elegans" + vep_cache_version = "111" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" } process { - withName: ENSEMBLVEP_DOWNLOAD { - ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } } - } diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 2a3b679e..4fc19b74 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -29,7 +29,7 @@ process FASTP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" - def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. if ( task.ext.args?.contains('--interleaved_in') ) { diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index 9b3f9a38..6f1f4897 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -251,7 +251,8 @@ nextflow_process { } test("fastp test_fastp_interleaved") { - config './nextflow.config' + + config './nextflow.interleaved.config' when { params { outdir = "$outputDir" @@ -277,7 +278,7 @@ nextflow_process { def html_text = [ "Q20 bases:25.719000 K (93.033098%)", "paired end (151 cycles + 151 cycles)"] def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 198"] + "reads passed filter: 162"] def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) } } }, { html_text.each { html_part -> diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index b4c0e1dd..3e876288 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -7,7 +7,7 @@ "id": "test", "single_end": true }, - "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" ] ] ], @@ -15,7 +15,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-01-17T18:08:06.123035" + "timestamp": "2024-03-18T16:19:15.063001" }, "test_fastp_paired_end_merged-for_stub_match": { "content": [ @@ -65,7 +65,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-01-17T18:06:00.223817" + "timestamp": "2024-03-18T16:18:43.526412" }, "versions_paired_end": { "content": [ @@ -112,7 +112,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-01T12:03:37.827323085" + "timestamp": "2024-03-18T16:19:15.111894" }, "test_fastp_paired_end_merged_match": { "content": [ @@ -283,7 +283,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-01T11:57:30.791982648" + "timestamp": "2024-03-18T16:18:43.580336" }, "versions_paired_end_merged_adapterlist": { "content": [ diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 00000000..4be8dbd2 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config similarity index 50% rename from modules/nf-core/fastp/tests/nextflow.config rename to modules/nf-core/fastp/tests/nextflow.save_failed.config index 0f7849ad..53b61b0c 100644 --- a/modules/nf-core/fastp/tests/nextflow.config +++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -1,6 +1,5 @@ process { - withName: FASTP { - ext.args = "--interleaved_in" + ext.args = "-e 30" } } diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test new file mode 100644 index 00000000..2289f73f --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_BEDTOINTERVALLIST" + script "../main.nf" + process "GATK4_BEDTOINTERVALLIST" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + + test("test_gatk4_bedtointervallist") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + input[1] = [ [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap new file mode 100644 index 00000000..48c322fd --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_bedtointervallist": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "1": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "versions": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T14:20:12.168775" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml new file mode 100644 index 00000000..b4d54f12 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/bedtointervallist: + - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 3e95dd71..9c24eb0a 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -1,8 +1,10 @@ name: samtools_faidx + channels: - conda-forge - bioconda - defaults + dependencies: - - bioconda::samtools=1.19.2 - bioconda::htslib=1.19.1 + - bioconda::samtools=1.19.2 diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index e189af28..f3c25de2 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -39,6 +39,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - fa: + type: file + description: FASTA file + pattern: "*.{fa}" - fai: type: file description: FASTA index file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..17244ef2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..3e651ef6 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:22:39.412601" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:23:22.427966" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:24:04.107537" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:24:45.868463" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:25:27.550554" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 00000000..f76a3ba0 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 00000000..33ebbd5d --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 00000000..e4a83948 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/subworkflows/local/annotate_snv.nf b/subworkflows/local/annotate_snv.nf index f7f0dd8e..4b5c1b3f 100644 --- a/subworkflows/local/annotate_snv.nf +++ b/subworkflows/local/annotate_snv.nf @@ -65,6 +65,7 @@ workflow ANNOTATE_SNV { ch_vcf_clin = TABIX_BGZIPTABIX.out.gz_tbi ch_versions = ch_versions.mix( ENSEMBLVEP_VEP.out.versions.first() ) + ch_versions = ch_versions.mix( RENAME_FILES.out.versions ) ch_versions = ch_versions.mix( GAWK.out.versions ) ch_versions = ch_versions.mix( ENSEMBLVEP_FILTERVEP.out.versions ) ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) From 8cfba718122c9d0d04a42074153e826edcf7ccbd Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Fri, 19 Apr 2024 15:07:12 +0200 Subject: [PATCH 29/45] fix CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 511e0190..e4806daa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ Parameter has been removed if new parameter information isn't present. - Prepare reference subworkflow has been reformated and simplified [#105](https://github.com/genomic-medicine-sweden/tomte/pull/105) - FastQC have been updated to correctly allocate memory [#106](https://github.com/genomic-medicine-sweden/tomte/pull/106) - vep_filters is now extracted from gene_panel_clinical_filter [#109](https://github.com/genomic-medicine-sweden/tomte/pull/109) -- Updated modules bcftools/stats, ensemblvep/vep, fastp, gatk4/bedtointervallist, samtools/faidx +- Updated modules bcftools/stats, ensemblvep/vep, fastp, gatk4/bedtointervallist, samtools/faidx [#110](https://github.com/genomic-medicine-sweden/tomte/pull/110) ### `Parameters` From 229a7e692f12e47ed578746cec44205601563b42 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Fri, 19 Apr 2024 16:22:36 +0200 Subject: [PATCH 30/45] feat prepare for release --- CHANGELOG.md | 10 ++++++++++ assets/multiqc_config.yml | 2 +- nextflow.config | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4806daa..59c02fd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,16 @@ Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: +## 1.1.2 - [XXXX-XX-XX] + +### `Added` + +### `Fixed` + +### `Parameters` + +## 1.1.2 - Santa [2024-04-19] + ### `Added` - Added automatic tests to test the pipeline with all switches set to false [#100](https://github.com/genomic-medicine-sweden/tomte/pull/100) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 7aa5892b..446b4830 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the genomic-medicine-sweden/tomte + This report has been generated by the genomic-medicine-sweden/tomte analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: diff --git a/nextflow.config b/nextflow.config index 4d462a27..9d61e666 100644 --- a/nextflow.config +++ b/nextflow.config @@ -270,7 +270,7 @@ manifest { description = """Pipeline to analyse RNAseq from raredisease patients""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.0' + version = '1.1.1' doi = '10.5281/zenodo.10828946' } From fc91749e84bb631179a5653db3836859c633d58f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Fri, 19 Apr 2024 16:34:00 +0200 Subject: [PATCH 31/45] Update CHANGELOG.md Co-authored-by: Anders Jemt --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59c02fd2..a66b6a27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ Parameter has been removed if new parameter information isn't present. ### `Parameters` -## 1.1.2 - Santa [2024-04-19] +## 1.1.1 - Santa [2024-04-19] ### `Added` From 967a87b53fc2fba2757c16c8e654e5ed55d5352f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Fri, 19 Apr 2024 16:35:49 +0200 Subject: [PATCH 32/45] Apply suggestions from code review --- CHANGELOG.md | 4 ++-- assets/multiqc_config.yml | 2 +- nextflow.config | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a66b6a27..f0992282 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: -## 1.1.2 - [XXXX-XX-XX] +## 1.2.1 - [XXXX-XX-XX] ### `Added` @@ -22,7 +22,7 @@ Parameter has been removed if new parameter information isn't present. ### `Parameters` -## 1.1.1 - Santa [2024-04-19] +## 1.2.0 - Santa [2024-04-19] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 446b4830..4d1ab8a6 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the genomic-medicine-sweden/tomte + This report has been generated by the genomic-medicine-sweden/tomte analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: diff --git a/nextflow.config b/nextflow.config index 9d61e666..e3366d83 100644 --- a/nextflow.config +++ b/nextflow.config @@ -270,7 +270,7 @@ manifest { description = """Pipeline to analyse RNAseq from raredisease patients""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.1' + version = '1.2.0' doi = '10.5281/zenodo.10828946' } From 8a6bd414abf6f3e3f494d2e67b213ecd5c18e51f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Mon, 22 Apr 2024 08:24:11 +0200 Subject: [PATCH 33/45] Apply suggestions from code review update gawk module --- modules.json | 2 +- modules/nf-core/gawk/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index d6efa4a8..401809cf 100644 --- a/modules.json +++ b/modules.json @@ -88,7 +88,7 @@ }, "gawk": { "branch": "master", - "git_sha": "da4d05d04e65227d4307e87940842f1a14de62c7", + "git_sha": "e8dd194205c8657440afc34695e3bb2d32840ce0", "installed_by": ["modules"] }, "gffcompare": { diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf index f856a1f8..449b9686 100644 --- a/modules/nf-core/gawk/main.nf +++ b/modules/nf-core/gawk/main.nf @@ -41,7 +41,7 @@ process GAWK { stub: prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: "${input.getExtension}" + suffix = task.ext.suffix ?: "${input.getExtension()}" """ touch ${prefix}.${suffix} From 6dcaf55de6090389cebe93fda58e3b25e1d6b3bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Mon, 22 Apr 2024 15:59:02 +0200 Subject: [PATCH 34/45] update salmon --- modules.json | 4 ++-- modules/nf-core/salmon/index/main.nf | 25 ++++++++++++++++++++++++ modules/nf-core/salmon/quant/main.nf | 29 ++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 401809cf..f41b0cae 100644 --- a/modules.json +++ b/modules.json @@ -118,12 +118,12 @@ }, "salmon/index": { "branch": "master", - "git_sha": "ffc101e1b84ef3df2e4e4a966e84b3c513ae5693", + "git_sha": "87e89b225e856a9071d66f248f04bf9f56bf4107", "installed_by": ["modules"] }, "salmon/quant": { "branch": "master", - "git_sha": "03a8562231d575c313266c193a980594b941e3ea", + "git_sha": "87e89b225e856a9071d66f248f04bf9f56bf4107", "installed_by": ["modules"], "patch": "modules/nf-core/salmon/quant/salmon-quant.diff" }, diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf index 88d9cf14..e755d9a3 100644 --- a/modules/nf-core/salmon/index/main.nf +++ b/modules/nf-core/salmon/index/main.nf @@ -44,4 +44,29 @@ process SALMON_INDEX { salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") END_VERSIONS """ + + stub: + """ + mkdir salmon + touch salmon/complete_ref_lens.bin + touch salmon/ctable.bin + touch salmon/ctg_offsets.bin + touch salmon/duplicate_clusters.tsv + touch salmon/info.json + touch salmon/mphf.bin + touch salmon/pos.bin + touch salmon/pre_indexing.log + touch salmon/rank.bin + touch salmon/refAccumLengths.bin + touch salmon/ref_indexing.log + touch salmon/reflengths.bin + touch salmon/refseq.bin + touch salmon/seq.bin + touch salmon/versionInfo.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ } diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index acad0cef..93ff610a 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -78,4 +78,33 @@ process SALMON_QUANT { salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + mkdir ${prefix}/aux_info + touch ${prefix}/aux_info/ambig_info.tsv + touch ${prefix}/aux_info/expected_bias.gz + touch ${prefix}/aux_info/exp_gc.gz + touch ${prefix}/aux_info/fld.gz + touch ${prefix}/aux_info/meta_info.json + touch ${prefix}/aux_info/observed_bias_3p.gz + touch ${prefix}/aux_info/observed_bias.gz + touch ${prefix}/aux_info/obs_gc.gz + touch ${prefix}/cmd_info.json + touch ${prefix}/lib_format_counts.json + mkdir ${prefix}/libParams + touch ${prefix}/libParams/flenDist.txt + mkdir ${prefix}/logs + touch ${prefix}/logs/salmon_quant.log + mkdir ${prefix}/quant.genes.sf + mkdir ${prefix}/quant.sf + touch ${prefix}_meta_info.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ } From 045ea6c1a97ac8549c2c4f689ebd7273f672222e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Mon, 22 Apr 2024 19:27:07 +0200 Subject: [PATCH 35/45] mpileup --- modules.json | 2 +- modules/nf-core/bcftools/mpileup/main.nf | 14 + .../bcftools/mpileup/tests/main.nf.test | 134 +++++-- .../bcftools/mpileup/tests/main.nf.test.snap | 340 +++++++++++++----- 4 files changed, 379 insertions(+), 111 deletions(-) diff --git a/modules.json b/modules.json index f41b0cae..649936c2 100644 --- a/modules.json +++ b/modules.json @@ -17,7 +17,7 @@ }, "bcftools/mpileup": { "branch": "master", - "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf index ed138600..61659b41 100644 --- a/modules/nf-core/bcftools/mpileup/main.nf +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -55,4 +55,18 @@ process BCFTOOLS_MPILEUP { bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bcftools_stats.txt + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.mpileup.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test b/modules/nf-core/bcftools/mpileup/tests/main.nf.test index 6478bbc2..dc35c542 100644 --- a/modules/nf-core/bcftools/mpileup/tests/main.nf.test +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test @@ -33,13 +33,43 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.vcf, - process.out.tbi, - process.out.stats, - process.out.mpileup, - process.out.versions - ).match() } + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_fasta_false.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_fasta_false.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_fasta_false.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_fasta_false_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, false stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_fasta_false_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_fasta_false_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_fasta_false_stub.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_fasta_false_stub_versions") } ) } @@ -67,13 +97,45 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.vcf, - process.out.tbi, - process.out.stats, - process.out.mpileup, - process.out.versions - ).match() } + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_true_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_true_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_true_stub.bcftools_stats.txt") }, + { assert snapshot(file(process.out.mpileup[0][1]).name).match("bam_bed_fasta_true_stub.mpileup.gz") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_true_stub_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, true stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_true.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_true.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_true.bcftools_stats.txt") }, + { assert snapshot(file(process.out.mpileup[0][1]).name).match("bam_bed_fasta_true.mpileup.gz") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_true_versions") } ) } @@ -101,13 +163,43 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.vcf, - process.out.tbi, - process.out.stats, - process.out.mpileup, - process.out.versions - ).match() } + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_false.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_false.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_false.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_false_versions") } + ) + } + + } + + test("sarscov2 - [bam, bed], fasta, false stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_false_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_false_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_false_stub.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_false_stub_versions") } ) } diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap index ef80ab1b..8b8d5eab 100644 --- a/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap @@ -1,112 +1,274 @@ { - "sarscov2 - [bam, []], fasta, true": { + "bam_bed_fasta_true.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.172966" + }, + "bam_bed_fasta_false_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.815085" + }, + "bam_fasta_false_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.731277" + }, + "bam_bed_fasta_false_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.844573" + }, + "bam_bed_fasta_true_stub.mpileup.gz": { + "content": [ + "test.mpileup.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.462382" + }, + "bam_bed_fasta_true.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.146525" + }, + "bam_bed_fasta_true_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.42754" + }, + "bam_fasta_false_versions": { "content": [ - [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,0f2f2c8488e97e7f13979380d5d3b6b5" - ] - ], - [ - [ - { - "id": "test" - }, - "test.vcf.gz.tbi:md5,34cb2eeb73f4d2b98218acecebd92704" - ] - ], - [ - [ - { - "id": "test" - }, - "test.bcftools_stats.txt:md5,a988fbcd2ea5d1ce30970dcb60a77ed7" - ] - ], - [ - [ - { - "id": "test" - }, - "test.mpileup.gz:md5,73b4a00398bddab2cd065b40d17ca4dc" - ] - ], [ "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" ] ], - "timestamp": "2023-11-29T14:11:54.549517279" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.137483" }, - "sarscov2 - [bam, bed], fasta, false": { + "bam_fasta_false_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.746204" + }, + "bam_bed_fasta_false_versions": { "content": [ - [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,687244dbf71d05b3b973ab08ecf05310" - ] - ], - [ - [ - { - "id": "test" - }, - "test.vcf.gz.tbi:md5,3785df15f3d7faf35f3ad70d167a50f7" - ] - ], - [ - [ - { - "id": "test" - }, - "test.bcftools_stats.txt:md5,f8c5ab149c4bf0e5f51c518346cb87b5" - ] - ], - [ - - ], [ "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" ] ], - "timestamp": "2023-11-29T14:12:00.865439661" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.772936" }, - "sarscov2 - [bam, []], fasta, false": { + "bam_bed_fasta_false.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.72798" + }, + "bam_bed_fasta_true_versions": { "content": [ [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,0f2f2c8488e97e7f13979380d5d3b6b5" - ] - ], - [ - [ - { - "id": "test" - }, - "test.vcf.gz.tbi:md5,34cb2eeb73f4d2b98218acecebd92704" - ] - ], + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.20135" + }, + "bam_fasta_false.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.051991" + }, + "bam_fasta_false.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.123726" + }, + "bam_bed_fasta_false.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.761517" + }, + "bam_bed_fasta_false_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.832271" + }, + "bam_bed_fasta_false.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.748389" + }, + "bam_fasta_false_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.709802" + }, + "bam_bed_fasta_true_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.453121" + }, + "bam_fasta_false.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.108027" + }, + "bam_fasta_false_stub_versions": { + "content": [ [ - [ - { - "id": "test" - }, - "test.bcftools_stats.txt:md5,a988fbcd2ea5d1ce30970dcb60a77ed7" - ] - ], + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.770612" + }, + "bam_bed_fasta_true.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.18304" + }, + "bam_bed_fasta_true_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.442077" + }, + "bam_bed_fasta_false_stub_versions": { + "content": [ [ - - ], + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.868309" + }, + "bam_bed_fasta_true.mpileup.gz": { + "content": [ + "test.mpileup.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.192888" + }, + "bam_bed_fasta_true_stub_versions": { + "content": [ [ "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" ] ], - "timestamp": "2023-11-29T14:11:47.814900494" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.470988" } } \ No newline at end of file From a546bc284c9357578944a1760efd254bc63d1c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 14:12:38 +0200 Subject: [PATCH 36/45] Apply suggestions from code review Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- CHANGELOG.md | 18 +++++++++--------- assets/multiqc_config.yml | 2 +- nextflow.config | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0992282..5bddc6ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,13 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Old parameter | New parameter | | ------------- | ------------- | -:::note -Parameter has been updated if both old and new parameter information is present. -Parameter has been added if just the new parameter information is present. -Parameter has been removed if new parameter information isn't present. -::: +> [!NOTE] +> Parameter has been updated if both old and new parameter information is present. +> Parameter has been added if just the new parameter information is present. +> Parameter has been removed if new parameter information isn't present. -## 1.2.1 - [XXXX-XX-XX] + +## 2.0.1 - [XXXX-XX-XX] ### `Added` @@ -22,7 +22,7 @@ Parameter has been removed if new parameter information isn't present. ### `Parameters` -## 1.2.0 - Santa [2024-04-19] +## 2.0.0 - Santa [2024-04-19] ### `Added` @@ -62,8 +62,8 @@ Parameter has been removed if new parameter information isn't present. | `--switch_drop_ae` | `--skip_drop_ae` | | `--switch_drop_as` | `--skip_drop_as` | -:::note Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: - +> [!NOTE] +> Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ## 1.1.0 - Rudolph [2024-03-11] Release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template. diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4d1ab8a6..b306e44f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the genomic-medicine-sweden/tomte + This report has been generated by the genomic-medicine-sweden/tomte analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: diff --git a/nextflow.config b/nextflow.config index e3366d83..7d348231 100644 --- a/nextflow.config +++ b/nextflow.config @@ -270,7 +270,7 @@ manifest { description = """Pipeline to analyse RNAseq from raredisease patients""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.2.0' + version = '2.0.0' doi = '10.5281/zenodo.10828946' } From 1c390171030f39f25a44c21fc5f86e814bf4b72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 14:15:12 +0200 Subject: [PATCH 37/45] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bddc6ce..6090c56d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Old parameter | New parameter | | ------------- | ------------- | -> [!NOTE] +> [!NOTE] > Parameter has been updated if both old and new parameter information is present. > Parameter has been added if just the new parameter information is present. > Parameter has been removed if new parameter information isn't present. From 75e1d038306b2aa55f83558da09463ba285b9e04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 14:18:10 +0200 Subject: [PATCH 38/45] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6090c56d..c0c4cc54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,7 +62,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | `--switch_drop_ae` | `--skip_drop_ae` | | `--switch_drop_as` | `--skip_drop_as` | -> [!NOTE] +> [!NOTE] > Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ## 1.1.0 - Rudolph [2024-03-11] From 5c6294feb7cc5e24b7af62305677326e1e8c8f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 14:20:56 +0200 Subject: [PATCH 39/45] Apply suggestions from code review --- CHANGELOG.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0c4cc54..3c803787 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Old parameter | New parameter | | ------------- | ------------- | -> [!NOTE] -> Parameter has been updated if both old and new parameter information is present. -> Parameter has been added if just the new parameter information is present. -> Parameter has been removed if new parameter information isn't present. +:::note +Parameter has been updated if both old and new parameter information is present. +Parameter has been added if just the new parameter information is present. +Parameter has been removed if new parameter information isn't present. +::: ## 2.0.1 - [XXXX-XX-XX] @@ -62,8 +63,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | `--switch_drop_ae` | `--skip_drop_ae` | | `--switch_drop_as` | `--skip_drop_as` | -> [!NOTE] -> Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. +:::note Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: ## 1.1.0 - Rudolph [2024-03-11] Release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template. From dc2d7b2cc463982ab90d502a5ff58adb9645207c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 14:22:20 +0200 Subject: [PATCH 40/45] CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c803787..c79f1b23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,6 @@ Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: - ## 2.0.1 - [XXXX-XX-XX] ### `Added` @@ -64,6 +63,7 @@ Parameter has been removed if new parameter information isn't present. | `--switch_drop_as` | `--skip_drop_as` | :::note Parameter has been updated if both old and new parameter information is present. Parameter has been added if just the new parameter information is present. Parameter has been removed if new parameter information isn't present. ::: + ## 1.1.0 - Rudolph [2024-03-11] Release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template. From 4364ee3ca5ec56ecae85b4410806ac16e1bacda4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 16:56:30 +0200 Subject: [PATCH 41/45] fix stub salmon --- modules.json | 4 +- .../nf-core/salmon/index/tests/main.nf.test | 24 ++++ .../salmon/index/tests/main.nf.test.snap | 12 ++ modules/nf-core/salmon/quant/main.nf | 19 +-- .../nf-core/salmon/quant/tests/main.nf.test | 136 ++++++++++++++++++ .../salmon/quant/tests/main.nf.test.snap | 75 ++++++++++ 6 files changed, 250 insertions(+), 20 deletions(-) diff --git a/modules.json b/modules.json index 649936c2..699da120 100644 --- a/modules.json +++ b/modules.json @@ -118,12 +118,12 @@ }, "salmon/index": { "branch": "master", - "git_sha": "87e89b225e856a9071d66f248f04bf9f56bf4107", + "git_sha": "cb6b2b94fc40dea58f0b1e3dd095f3dd24f2ac8a", "installed_by": ["modules"] }, "salmon/quant": { "branch": "master", - "git_sha": "87e89b225e856a9071d66f248f04bf9f56bf4107", + "git_sha": "cb6b2b94fc40dea58f0b1e3dd095f3dd24f2ac8a", "installed_by": ["modules"], "patch": "modules/nf-core/salmon/quant/salmon-quant.diff" }, diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test index 538b231b..16b3c1a7 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -32,4 +32,28 @@ nextflow_process { } + test("sarscov2 stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions stub") } + ) + } + + } + } \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap index 94edf390..703e455c 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -10,5 +10,17 @@ "nextflow": "23.10.1" }, "timestamp": "2023-11-22T14:26:33.32036" + }, + "versions stub": { + "content": [ + [ + "versions.yml:md5,563eeafb4577be0b13801d7021c0bf42" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T09:47:58.828124" } } \ No newline at end of file diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index 93ff610a..653cb240 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -80,26 +80,9 @@ process SALMON_QUANT { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ mkdir ${prefix} - mkdir ${prefix}/aux_info - touch ${prefix}/aux_info/ambig_info.tsv - touch ${prefix}/aux_info/expected_bias.gz - touch ${prefix}/aux_info/exp_gc.gz - touch ${prefix}/aux_info/fld.gz - touch ${prefix}/aux_info/meta_info.json - touch ${prefix}/aux_info/observed_bias_3p.gz - touch ${prefix}/aux_info/observed_bias.gz - touch ${prefix}/aux_info/obs_gc.gz - touch ${prefix}/cmd_info.json - touch ${prefix}/lib_format_counts.json - mkdir ${prefix}/libParams - touch ${prefix}/libParams/flenDist.txt - mkdir ${prefix}/logs - touch ${prefix}/logs/salmon_quant.log - mkdir ${prefix}/quant.genes.sf - mkdir ${prefix}/quant.sf touch ${prefix}_meta_info.json cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test b/modules/nf-core/salmon/quant/tests/main.nf.test index 04e61e2b..b387fac2 100644 --- a/modules/nf-core/salmon/quant/tests/main.nf.test +++ b/modules/nf-core/salmon/quant/tests/main.nf.test @@ -54,6 +54,38 @@ nextflow_process { } + test("sarscov2 - single_end stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions_single_end_stub") } + ) + } + + } + test("sarscov2 - single_end lib type A") { when { @@ -86,6 +118,38 @@ nextflow_process { } + test("sarscov2 - single_end lib type A stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = 'A' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions_single_end_lib_type_a_stub") } + ) + } + + } + test("sarscov2 - pair_end") { when { @@ -121,6 +185,41 @@ nextflow_process { } + test("sarscov2 - pair_end stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions_pair_end stub") } + ) + } + + } + test("sarscov2 - pair_end multiple") { when { @@ -157,4 +256,41 @@ nextflow_process { } } + + test("sarscov2 - pair_end multiple stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions_pair_end_multiple_stub") } + ) + } + + } } diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test.snap b/modules/nf-core/salmon/quant/tests/main.nf.test.snap index a1ec792e..a80bc8aa 100644 --- a/modules/nf-core/salmon/quant/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/quant/tests/main.nf.test.snap @@ -1,4 +1,43 @@ { + "versions_single_end_lib_type_a_stub": { + "content": [ + [ + "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T14:34:21.647863" + }, + "versions_pair_end_multiple_stub": { + "content": [ + [ + "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T14:34:48.03415" + }, + "sarscov2 - single_end stub": { + "content": [ + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T09:34:16.444372" + }, "versions_single_end": { "content": [ [ @@ -23,6 +62,42 @@ }, "timestamp": "2024-02-06T17:10:56.121713" }, + "versions_pair_end_stub": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T08:38:01.276656" + }, + "versions_pair_end stub": { + "content": [ + [ + "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T14:34:36.570127" + }, + "versions_single_end_stub": { + "content": [ + [ + "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T14:28:01.864343" + }, "versions_single_end_lib_type_a": { "content": [ [ From 8a17d23cc04d1a406f7a8c33736f41b2873766fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 17:15:33 +0200 Subject: [PATCH 42/45] clean disk space --- .github/workflows/download_pipeline.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 08622fd5..3c7ee491 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -30,6 +30,9 @@ jobs: - name: Install Nextflow uses: nf-core/setup-nextflow@v1 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" From a5214df7f907ec3a38dbbc86f98630d4e7a22cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 17:30:13 +0200 Subject: [PATCH 43/45] skip drop in download pipeline --- .github/workflows/download_pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 3c7ee491..5d4f1650 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -72,4 +72,4 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results --skip_drop_ae true --skip_drop_as true From 13deb6de05937743ca32d80ac6887ab08e3ea787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 17:44:35 +0200 Subject: [PATCH 44/45] skip drop in download pipeline --- .github/workflows/download_pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 5d4f1650..0a2a04ee 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -72,4 +72,4 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results --skip_drop_ae true --skip_drop_as true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) --skip_drop_ae true --skip_drop_as true -stub -profile test,singularity --outdir ./results From 179bcc2e765a67e2ae6ff93967c9258f9c489805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Wed, 24 Apr 2024 18:03:53 +0200 Subject: [PATCH 45/45] skip drop in download pipeline changing test.config --- .github/workflows/download_pipeline.yml | 2 +- conf/test.config | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 0a2a04ee..3c7ee491 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -72,4 +72,4 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) --skip_drop_ae true --skip_drop_as true -stub -profile test,singularity --outdir ./results + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/conf/test.config b/conf/test.config index 0dee458a..0f713a86 100644 --- a/conf/test.config +++ b/conf/test.config @@ -19,6 +19,9 @@ params { max_memory = '6.GB' max_time = '6.h' + // Skip when GITHUB actions + skip_drop_ae = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true + skip_drop_as = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // Input data input = "${projectDir}/test_data/samplesheet_chr21.csv"