nf-core · yocra3 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/modules/nf-core/stimulus/splitcsv/environment.yml b/modules/nf-core/stimulus/splitcsv/environment.yml
@@ -0,0 +1,13 @@
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - conda-forge::matplotlib=3.9.2
+  - conda-forge::pandas=2.2.3
+  - conda-forge::polars=1.9.0
+  - conda-forge::python=3.12
+  - conda-forge::pytorch=2.4.1
+  - conda-forge::ray-core=2.37.0
+  - conda-forge::safetensors=0.4.5
+  - conda-forge::scikit-learn=1.5.2
diff --git a/modules/nf-core/stimulus/splitcsv/main.nf b/modules/nf-core/stimulus/splitcsv/main.nf
@@ -0,0 +1,51 @@
+process STIMULUS_SPLITCSV {
+    tag "$meta.id"
+    label 'process_low'
+
+    // #TO-DO: UPDATE
+    // container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    //     'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
+    //     'biocontainers/YOUR-TOOL-HERE' }"
+
+    conda "${moduleDir}/environment.yml"
+    container "docker.io/mathysgrapotte/stimulus-py:latest"
+
+    input:
+    tuple val(meta), path(split_json)
+    path(data_csv)
+
+    output:
+    tuple val(meta), path("${prefix}.csv"), emit: split_csv
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    stimulus-split-csv -c ${data_csv} -j ${split_json} -o ${prefix}.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Python: \$(python --version | cut -d ' ' -f 2)
+        Stimulus-py: \$( pip show stimulus-py | grep Version | sed 's/Version: //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def STIMULUS_VER = '0.0.9' // container not used in stub, change manually
+    """
+    touch ${prefix}.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Python: \$(python --version | cut -d ' ' -f 2)
+        Stimulus-py: ${STIMULUS_VER}
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/stimulus/splitcsv/meta.yml b/modules/nf-core/stimulus/splitcsv/meta.yml
@@ -0,0 +1,53 @@
+name: "stimulus_splitcsv"
+description: Split the data.
+keywords:
+  - machine learning
+  - neural network
+  - unit test
+tools:
+  - "stimulus":
+      description: "Stochastic Testing and Input Manipulation for Unbiased Learning\
+        \ Systems"
+      homepage: "https://github.com/mathysgrapotte/stimulus-py"
+      documentation: "https://github.com/mathysgrapotte/stimulus-py"
+      tool_dev_url: "https://github.com/mathysgrapotte/stimulus-py"
+      licence: ["MIT"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - split_json:
+        type: file
+        description: json file with the data split configuration
+        pattern: "*.json"
+  - - data_csv:
+        type: file
+        description: csv file with the data for training the models
+        pattern: "*.csv"
+
+output:
+  - split_csv:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+          pattern: "*.csv"
+      - ${prefix}.csv:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+          pattern: "*.csv"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@yocra3"
+maintainers:
+  - "@yocra3"
diff --git a/modules/nf-core/stimulus/splitcsv/tests/main.nf.test b/modules/nf-core/stimulus/splitcsv/tests/main.nf.test
@@ -0,0 +1,64 @@
+nextflow_process {
+
+    name "Test Process STIMULUS_SPLITCSV"
+    script "../main.nf"
+    process "STIMULUS_SPLITCSV"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "stimulus"
+    tag "stimulus/splitcsv"
+
+    test("split_csv") {
+
+       when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test'],
+                      file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.json', checkIfExists: true)
+                ]
+                input[1] = [
+                     file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("split_csv - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test'],
+                      file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.json', checkIfExists: true)
+                ]
+                input[1] = [
+                     file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/stimulus/splitcsv/tests/main.nf.test.snap b/modules/nf-core/stimulus/splitcsv/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+    "split_csv - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
+                ],
+                "split_csv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-10-29T14:30:40.69771525"
+    },
+    "split_csv": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,adcda781dba6449230123ef6620ddb4d"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
+                ],
+                "split_csv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,adcda781dba6449230123ef6620ddb4d"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-10-29T14:30:34.387240108"
+    }
+}