Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Stimulus CSV #6884

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
13 changes: 13 additions & 0 deletions modules/nf-core/stimulus/splitcsv/environment.yml
alessiovignoli marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- conda-forge
- bioconda

dependencies:
- conda-forge::matplotlib=3.9.2
- conda-forge::pandas=2.2.3
- conda-forge::polars=1.9.0
- conda-forge::python=3.12
- conda-forge::pytorch=2.4.1
- conda-forge::ray-core=2.37.0
- conda-forge::safetensors=0.4.5
- conda-forge::scikit-learn=1.5.2
51 changes: 51 additions & 0 deletions modules/nf-core/stimulus/splitcsv/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process STIMULUS_SPLITCSV {
tag "$meta.id"
label 'process_low'

// #TO-DO: UPDATE
// container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
// 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
// 'biocontainers/YOUR-TOOL-HERE' }"

conda "${moduleDir}/environment.yml"
container "docker.io/mathysgrapotte/stimulus-py:latest"

input:
tuple val(meta), path(split_json)
path(data_csv)

output:
tuple val(meta), path("${prefix}.csv"), emit: split_csv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"

"""
stimulus-split-csv -c ${data_csv} -j ${split_json} -o ${prefix}.csv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python --version | cut -d ' ' -f 2)
Stimulus-py: \$( pip show stimulus-py | grep Version | sed 's/Version: //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def STIMULUS_VER = '0.0.9' // container not used in stub, change manually
"""
touch ${prefix}.csv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python --version | cut -d ' ' -f 2)
Stimulus-py: ${STIMULUS_VER}
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/nf-core/stimulus/splitcsv/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: "stimulus_splitcsv"
description: Split the data.
keywords:
- machine learning
- neural network
- unit test
tools:
- "stimulus":
description: "Stochastic Testing and Input Manipulation for Unbiased Learning\
\ Systems"
homepage: "https://github.com/mathysgrapotte/stimulus-py"
documentation: "https://github.com/mathysgrapotte/stimulus-py"
tool_dev_url: "https://github.com/mathysgrapotte/stimulus-py"
licence: ["MIT"]
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- split_json:
type: file
description: json file with the data split configuration
pattern: "*.json"
- - data_csv:
type: file
description: csv file with the data for training the models
pattern: "*.csv"

output:
- split_csv:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
pattern: "*.csv"
- ${prefix}.csv:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
pattern: "*.csv"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@yocra3"
maintainers:
- "@yocra3"
64 changes: 64 additions & 0 deletions modules/nf-core/stimulus/splitcsv/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
nextflow_process {

name "Test Process STIMULUS_SPLITCSV"
script "../main.nf"
process "STIMULUS_SPLITCSV"

tag "modules"
tag "modules_nfcore"
tag "stimulus"
tag "stimulus/splitcsv"

test("split_csv") {

when {
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.json', checkIfExists: true)
]
input[1] = [
file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("split_csv - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.json', checkIfExists: true)
]
input[1] = [
file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
68 changes: 68 additions & 0 deletions modules/nf-core/stimulus/splitcsv/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"split_csv - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
],
"split_csv": [
[
{
"id": "test"
},
"test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.3"
},
"timestamp": "2024-10-29T14:30:40.69771525"
},
"split_csv": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.csv:md5,adcda781dba6449230123ef6620ddb4d"
]
],
"1": [
"versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
],
"split_csv": [
[
{
"id": "test"
},
"test.csv:md5,adcda781dba6449230123ef6620ddb4d"
]
],
"versions": [
"versions.yml:md5,96f455280ca86779dab4d17bbeb38372"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.3"
},
"timestamp": "2024-10-29T14:30:34.387240108"
}
}
Loading