diff --git a/modules/nf-core/mirdeep2/mapper/environment.yml b/modules/nf-core/mirdeep2/mapper/environment.yml new file mode 100644 index 00000000000..fafc6663255 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirdeep2=2.0.1.2" diff --git a/modules/nf-core/mirdeep2/mapper/main.nf b/modules/nf-core/mirdeep2/mapper/main.nf new file mode 100644 index 00000000000..d52820a362b --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/main.nf @@ -0,0 +1,53 @@ +process MIRDEEP2_MAPPER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': + 'biocontainers/mirdeep2:2.0.1.2--0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index, stageAs: '*') + + output: + tuple val(meta), path('*.fa'), path('*.arf'), emit: outputs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + + """ + mapper.pl \\ + ${reads} \\ + $args \\ + -p ${index}/${meta2.id} \\ + -s ${prefix}_collapsed.fa \\ + -t ${prefix}_reads_collapsed_vs_${meta2.id}_genome.arf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + touch ${prefix}.fa + touch ${prefix}reads_vs_refdb.arf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirdeep2/mapper/meta.yml b/modules/nf-core/mirdeep2/mapper/meta.yml new file mode 100644 index 00000000000..5844344cf97 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/meta.yml @@ -0,0 +1,50 @@ +name: "mirdeep2_mapper" +description: | + miRDeep2 Mapper is a tool that prepares deep sequencing reads for downstream miRNA detection by collapsing reads, mapping them to a genome, and outputting the required files for miRNA discovery. +keywords: + - mirdeep2 + - mapper + - RNA sequencing +tools: + - "mirdeep2": + description: | + miRDeep2 Mapper (`mapper.pl`) is part of the miRDeep2 suite. It collapses identical reads, maps them to a reference genome, and outputs both collapsed FASTA and ARF files for downstream miRNA detection and analysis. + homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" + doi: "10.1093/nar/gkn491" + licence: ["GPL V3"] + identifier: biotools:mirdeep2 + +input: + - - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', + single_end:false ]` + - reads: + type: file + description: File containing the raw sequencing reads that need to be collapsed + and mapped to a reference genome. + pattern: "*.fa" + - - meta2: + type: map + description: Groovy Map containing information about the genome index. + - index: + type: file + description: Path to the genome index file used for mapping the reads to the + genome. + pattern: "*" +output: + - outputs: + - meta: {} + - "*.fa": {} + - "*.arf": {} + - versions: + - versions.yml: + type: file + description: File containing software versions for tracking. + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test new file mode 100644 index 00000000000..62e3e615abc --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test @@ -0,0 +1,141 @@ + +nextflow_process { + + name "Test Process MIRDEEP2_MAPPER" + script "../main.nf" + process "MIRDEEP2_MAPPER" + + tag "modules" + tag "modules_nfcore" + tag "mirdeep2" + tag "bowtie/build" + tag "mirdeep2/mapper" + tag "seqkit/fq2fa" + tag "seqkit/replace" + + + setup { + run("BOWTIE_BUILD") { + script "../../../bowtie/build/main.nf" + process { + """ + input[0] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + """ + } + } + + run("SEQKIT_FQ2FA") { + script "../../../seqkit/fq2fa/main.nf" + process { + """ + input[0] = [ + [ id:'small_Clone1_N1' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) + ] + """ + } + } + + run("SEQKIT_REPLACE") { + script "../../../seqkit/replace/main.nf" + config "./nextflow.config" + process { + """ + input[0] = SEQKIT_FQ2FA.out.fasta + """ + } + } + + } + + test("mirdeep2 - mapper - fasta celegans") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + + // md5sum not stable - IDs change while sequences are the same + + // Assert TCACCGGGGGTACATCAGCTAA occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 }, + + // Assert seq_347479_x287 occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 }, + + // Assert that specific content occurs 4 times + { assert file(process.out.outputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 } + ) + } + + } + + test("mirdeep2 - mapper - fasta smrnaseq") { + config "./nextflow.config" + + when { + process { + """ + input[0] = SEQKIT_REPLACE.out.fastx + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + + // Assert reads occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 }, + + // Assert ID occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 } + + ) + } + + } + + test("mirdeep2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap new file mode 100644 index 00000000000..4c3697d976e --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "mirdeep2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_reads", + "single_end": false + }, + "test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ], + "outputs": [ + [ + { + "id": "test_reads", + "single_end": false + }, + "test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-20T20:58:19.544297445" + }, + "mirdeep2 - mapper - fasta celegans": { + "content": [ + [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T17:41:05.101661825" + } +} \ No newline at end of file diff --git a/modules/nf-core/mirdeep2/mapper/tests/nextflow.config b/modules/nf-core/mirdeep2/mapper/tests/nextflow.config new file mode 100644 index 00000000000..ec097561e60 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } + + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/environment.yml b/modules/nf-core/mirdeep2/mirdeep2/environment.yml new file mode 100644 index 00000000000..fafc6663255 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirdeep2=2.0.1.2" diff --git a/modules/nf-core/mirdeep2/mirdeep2/main.nf b/modules/nf-core/mirdeep2/mirdeep2/main.nf new file mode 100644 index 00000000000..66c859683ac --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/main.nf @@ -0,0 +1,64 @@ +process MIRDEEP2_MIRDEEP2 { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': + 'biocontainers/mirdeep2:2.0.1.2--0' }" + + input: + tuple val(meta), path(processed_reads), path(genome_mappings) + tuple val(meta2), path(fasta) + tuple val(meta3), path(mature), path(hairpin), path(mature_other_species) + + output: + tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + def mature_species = mature ? "${mature}" : "none" + def mature_other = mature_other_species ? "${mature_other_species}": "none" + def precursors = hairpin ? "${hairpin}" : "none" + + """ + miRDeep2.pl \\ + $processed_reads \\ + $fasta \\ + $genome_mappings \\ + $mature_species \\ + $mature_other \\ + $precursors \\ + $args + + mv result_*.bed result_${prefix}.bed + mv result_*.csv result_${prefix}.csv + mv result_*.html result_${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + touch result_${prefix}.html + touch result_${prefix}.bed + touch result_${prefix}.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/meta.yml b/modules/nf-core/mirdeep2/mirdeep2/meta.yml new file mode 100644 index 00000000000..adf1410195b --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/meta.yml @@ -0,0 +1,76 @@ +name: "mirdeep2_mirdeep2" +description: | + miRDeep2 is a tool for identifying known and novel miRNAs in deep sequencing data by analyzing sequenced RNAs. It integrates the mapping of sequencing reads to the genome and predicts miRNA precursors and mature miRNAs. +keywords: + - mirdeep2 + - miRNA + - RNA sequencing +tools: + - "mirdeep2": + description: | + miRDeep2 is a tool that discovers microRNA genes by analyzing sequenced RNAs. + It includes three main scripts: `miRDeep2.pl`, `mapper.pl`, and `quantifier.pl` for comprehensive miRNA detection and quantification. + homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" + doi: "10.1093/nar/gkn491" + licence: ["GPL V3"] + identifier: biotools:mirdeep2 + +input: + - - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', + single_end:false ]` + - processed_reads: + type: file + description: FASTA file containing the processed sequencing reads. + pattern: "*.fa" + - genome_mappings: + type: file + description: ARF format file with mapped reads to the genome. + pattern: "*.arf" + - - meta2: + type: map + description: Groovy Map for genome FASTA file metadata, e.g. `[ id:'genome']` + - fasta: + type: file + description: FASTA file of the corresponding genome. + pattern: "*.fa" + - - meta3: + type: map + description: Groovy Map for miRNA metadata, e.g. `[ id:'mirbase', single_end:false + ]` + - mature: + type: file + description: FASTA file containing known mature miRNAs of the species being + analyzed. + pattern: "*.fa" + - hairpin: + type: file + description: FASTA file containing hairpin sequences (miRNA precursors). + pattern: "*.fa" + - mature_other_species: + type: file + description: FASTA file containing known mature miRNAs of other species. + pattern: "*.fa" +output: + - outputs: + - meta: + type: map + description: Groovy Map containing sample information e.g. `[ id:'sample1', + single_end:false ]` + - result*.{bed,csv,html}: + type: file + description: Output files, including BED, CSV, and HTML results files with an + overview of detected miRNAs. + pattern: "result*.{bed,csv,html}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test new file mode 100644 index 00000000000..b7b73ec123d --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process MIRDEEP2_MIRDEEP2" + script "../main.nf" + process "MIRDEEP2_MIRDEEP2" + + tag "modules" + tag "modules_nfcore" + tag "mirdeep2" + tag "mirdeep2/mirdeep2" + tag "bowtie/build" + tag "mirdeep2/mapper" + + + setup { + run("BOWTIE_BUILD") { + script "../../../bowtie/build/main.nf" + process { + """ + input[0] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + """ + } + } + + run("MIRDEEP2_MAPPER") { + script "../../../mirdeep2/mapper/main.nf" + config "./nextflow.config" + + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + } + + test("mirdeep2 - mirdeep2 - fa") { + + when { + process { + """ + input[0] = MIRDEEP2_MAPPER.out.outputs + input[1] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + input[2] = [ + [ id:'hairpin_mature'], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/precursors_ref_this_species.fa', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions, + path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains(''), + process.out.outputs.get(0).get(1)[0], + path(process.out.outputs.get(0).get(1)[1]).readLines().first().contains('miRDeep2 score') + ).match() }, + // Assert .html + { assert path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } + ) + } + + } + + test("mirdeep - mirdeep2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = MIRDEEP2_MAPPER.out.outputs + input[1] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + input[2] = [ + [ id:'hairpin_mature'], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_other_species.fa', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap new file mode 100644 index 00000000000..f8ffcf019db --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "mirdeep - mirdeep2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_reads", + "single_end": false + }, + [ + "result_test_reads.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ], + "outputs": [ + [ + { + "id": "test_reads", + "single_end": false + }, + [ + "result_test_reads.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-20T21:04:53.304188615" + }, + "mirdeep2 - mirdeep2 - fa": { + "content": [ + [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ], + true, + "result_test_reads.bed:md5,ba5ef5782e40d7219ca064dd68865d74", + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-23T15:08:50.660562955" + } +} \ No newline at end of file diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config b/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config new file mode 100644 index 00000000000..6a33ae05eed --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf new file mode 100644 index 00000000000..f8c3da93113 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf @@ -0,0 +1,33 @@ +include { SEQKIT_FQ2FA } from '../../../modules/nf-core/seqkit/fq2fa/main' +include { SEQKIT_REPLACE } from '../../../modules/nf-core/seqkit/replace/main' +include { MIRDEEP2_MAPPER } from '../../../modules/nf-core/mirdeep2/mapper/main' +include { MIRDEEP2_MIRDEEP2 } from '../../../modules/nf-core/mirdeep2/mirdeep2/main' + +workflow FASTQ_FIND_MIRNA_MIRDEEP2 { + + take: + ch_reads // channel: [ val(meta), fastq ] + ch_genome_fasta // channel: [ val(meta), genome_fasta ] + ch_bowtie_index // channel: [ val(meta), index ] + ch_mirna_mature_hairpin // channel: [ val(meta), mature_mirna, hairpin_mirna ] + + main: + + ch_versions = Channel.empty() + + SEQKIT_FQ2FA ( ch_reads ) + ch_versions = ch_versions.mix(SEQKIT_FQ2FA.out.versions) + + SEQKIT_REPLACE ( SEQKIT_FQ2FA.out.fasta ) + ch_versions = ch_versions.mix(SEQKIT_REPLACE.out.versions) + + MIRDEEP2_MAPPER ( SEQKIT_REPLACE.out.fastx, ch_bowtie_index ) + ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions) + + MIRDEEP2_MIRDEEP2 ( MIRDEEP2_MAPPER.out.outputs, ch_genome_fasta, ch_mirna_mature_hairpin ) + ch_versions = ch_versions.mix(MIRDEEP2_MIRDEEP2.out.versions) + + emit: + outputs = MIRDEEP2_MIRDEEP2.out.outputs // channel: [ val(meta), [ bed, csv, html ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml new file mode 100644 index 00000000000..22a475b36f7 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml @@ -0,0 +1,51 @@ +name: "fastq_find_mirna_mirdeep2" +description: | + This subworkflow identifies miRNAs from FASTQ files using miRDeep2. The workflow converts FASTQ to FASTA, processes and replaces any whitespace in sequence IDs, builds a Bowtie index of the genome, and then maps reads using miRDeep2 mapper before identifying known and novel miRNAs. +keywords: + - miRNA + - FASTQ + - FASTA + - Bowtie + - miRDeep2 +components: + - seqkit/fq2fa + - seqkit/replace + - bowtie/build + - mirdeep2/mapper + - mirdeep2/mirdeep2 +input: + - ch_reads: + type: file + description: | + The input channel containing the FASTQ files to process and identify miRNAs. + Structure: [ val(meta), path(fastq) ] + pattern: "*.fastq.gz" + - ch_genome_fasta: + type: file + description: | + The input channel containing the genome FASTA files used to build the Bowtie index. + Structure: [ val(meta), path(fasta) ] + pattern: "*.fa" + - ch_mirna_mature_hairpin: + type: file + description: | + The input channel containing the mature and hairpin miRNA sequences for miRNA identification. + Structure: [ val(meta), path(mature_fasta), path(hairpin_fasta) ] + pattern: "*.fa" +output: + - outputs: + type: file + description: | + The output channel containing the BED, CSV, and HTML files with the identified miRNAs. + Structure: [ val(meta), path(bed), path(csv), path(html) ] + pattern: "*.{bed,csv,html}" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test new file mode 100644 index 00000000000..13c10e52658 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test @@ -0,0 +1,80 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_FIND_MIRNA_MIRDEEP2" + script "../main.nf" + workflow "FASTQ_FIND_MIRNA_MIRDEEP2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_find_mirna_mirdeep2" + tag "mirdeep2/mapper" + tag "mirdeep2/mirdeep2" + tag "seqkit/fq2fa" + tag "seqkit/replace" + tag "bowtie/build" + + + test("smrnaseq - fasta - single_end") { + config "./nextflow.config" + + setup { + run("SEQKIT_REPLACE") { + script "modules/nf-core/seqkit/replace/main.nf" + config "./nextflow.config" + + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa', checkIfExists: true) + ] + """ + } + } + + run("BOWTIE_BUILD") { + script "modules/nf-core/bowtie/build/main.nf" + process { + """ + input[0] = SEQKIT_REPLACE.out.fastx + """ + } + } + } + + when { + workflow { + """ + input[0] = [ + [ id:'small_Clone1_N1', single_end:false ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) + ] + + input[1] = SEQKIT_REPLACE.out.fastx + + input[2] = BOWTIE_BUILD.out.index + + input[3] = [ + [ id:'mirna_mature_hairpin'], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_mature.fa', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.versions, + path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains(''), + workflow.out.outputs.get(0).get(1)[0], + path(workflow.out.outputs.get(0).get(1)[1]).readLines().first().contains('miRDeep2 score') + ).match()}, + // Assert .html + { assert path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap new file mode 100644 index 00000000000..c48df3d7ed9 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap @@ -0,0 +1,20 @@ +{ + "smrnaseq - fasta - single_end": { + "content": [ + [ + "versions.yml:md5,10138b74aed5b2658c26ddf80ff391d5", + "versions.yml:md5,631c0428c28d5355f0e3e9bd790bd77d", + "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", + "versions.yml:md5,756eee52b4a45f7a9effe33b1cd3cb92" + ], + true, + "result_small_Clone1_N1.bed:md5,98a74ac6dd16ee876e9a3f54d2695c88", + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-23T14:56:03.274059331" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config new file mode 100644 index 00000000000..ec097561e60 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } + + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + +}