From 40cf7e51cf9b1ed3eb53d8e02d3c737c645c4145 Mon Sep 17 00:00:00 2001
From: jonasscheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 14 May 2024 11:49:25 +0000
Subject: [PATCH 01/12] Add timstof tests and archive support

---
 .github/workflows/ci.yml                      |  2 +-
 CHANGELOG.md                                  |  2 +
 conf/modules.config                           | 26 ++++++-
 conf/test_timstof.config                      | 39 ++++++++++
 modules.json                                  |  5 ++
 modules/local/untar/environment.yml           | 11 +++
 modules/local/untar/main.nf                   | 54 +++++++++++++
 modules/local/untar/meta.yml                  | 46 +++++++++++
 modules/local/unzip/environment.yml           |  7 ++
 modules/local/unzip/main.nf                   | 37 +++++++++
 modules/local/unzip/meta.yml                  | 42 ++++++++++
 modules/nf-core/gunzip/environment.yml        |  7 ++
 modules/nf-core/gunzip/main.nf                | 48 ++++++++++++
 modules/nf-core/gunzip/meta.yml               | 39 ++++++++++
 modules/nf-core/gunzip/tests/main.nf.test     | 36 +++++++++
 .../nf-core/gunzip/tests/main.nf.test.snap    | 31 ++++++++
 modules/nf-core/gunzip/tests/tags.yml         |  2 +
 nextflow.config                               |  1 +
 subworkflows/local/prepare_spectra.nf         | 76 +++++++++++++++++++
 .../utils_nfcore_mhcquant_pipeline/main.nf    | 11 ++-
 workflows/mhcquant.nf                         | 47 ++----------
 21 files changed, 527 insertions(+), 42 deletions(-)
 create mode 100644 conf/test_timstof.config
 create mode 100644 modules/local/untar/environment.yml
 create mode 100644 modules/local/untar/main.nf
 create mode 100644 modules/local/untar/meta.yml
 create mode 100644 modules/local/unzip/environment.yml
 create mode 100644 modules/local/unzip/main.nf
 create mode 100644 modules/local/unzip/meta.yml
 create mode 100644 modules/nf-core/gunzip/environment.yml
 create mode 100644 modules/nf-core/gunzip/main.nf
 create mode 100644 modules/nf-core/gunzip/meta.yml
 create mode 100644 modules/nf-core/gunzip/tests/main.nf.test
 create mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gunzip/tests/tags.yml
 create mode 100644 subworkflows/local/prepare_spectra.nf

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3a95a28a..314858d3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -58,7 +58,7 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        tests: ["test_mokapot", "test_percolator", "test_ionannotator"]
+        tests: ["test_mokapot", "test_percolator", "test_ionannotator", "test_timstof"]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea443d4d..d0ffcecd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Added`
 
 - Added MS²Rescore module with the underlying python CLI [#293](https://github.com/nf-core/mhcquant/pull/293)
+- Added support for handling various archive formats: `d|d.tar.gz|d.tar|d.zip|mzML.gz|raw|RAW|mzML` [#323](https://github.com/nf-core/mhcquant/pull/323)
+- Added test for timsTOF data [#323](https://github.com/nf-core/mhcquant/pull/323)
 
 ### `Fixed`
 
diff --git a/conf/modules.config b/conf/modules.config
index 59a325fe..bfae605b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -29,8 +29,32 @@ process {
 process {
 
     withName: 'THERMORAWFILEPARSER' {
-        ext.args = "-f 2"
+        ext.args   = "-f 2"
         ext.prefix = {"${raw.baseName}"}
+        publishDir = [
+            path: {"${params.outdir}"},
+            mode: params.publish_dir_mode,
+            enabled: false
+        ]
+    }
+
+    withName: 'UNTAR' {
+        publishDir  = [
+            path: {"${params.outdir}"},
+            mode: params.publish_dir_mode,
+            enabled: false
+        ]
+    }
+
+    withName: 'UNZIP' {
+        publishDir  = [
+            path: {"${params.outdir}"},
+            mode: params.publish_dir_mode,
+            enabled: false
+        ]
+    }
+
+    withName: 'GUNZIP' {
         publishDir  = [
             path: {"${params.outdir}"},
             mode: params.publish_dir_mode,
diff --git a/conf/test_timstof.config b/conf/test_timstof.config
new file mode 100644
index 00000000..067f4437
--- /dev/null
+++ b/conf/test_timstof.config
@@ -0,0 +1,39 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests on timsTOF data with  MS²Rescore and Percolator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/mhcquant -profile test_timstof,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test timsTOF profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function with timsTOF data'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input = '/mnt/volume/dev/mhcquant/samplesheets/samplesheet_timstof_test.tsv'
+    fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
+
+    // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
+    skip_quantification = true
+
+    // Search settings
+	activation_method        = 'CID'
+	prec_charge              = '1:4'
+	precursor_mass_tolerance = 20
+	fragment_mass_tolerance  = 0.01
+    spectrum_batch_size      = 1000
+
+    // MS²Rescore settings
+    feature_generators       = 'ms2pip'
+    ms2pip_model             = 'timsTOF'
+}
diff --git a/modules.json b/modules.json
index 955a9ad1..86c6a978 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
diff --git a/modules/local/untar/environment.yml b/modules/local/untar/environment.yml
new file mode 100644
index 00000000..0c9cbb10
--- /dev/null
+++ b/modules/local/untar/environment.yml
@@ -0,0 +1,11 @@
+name: untar
+
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+
+dependencies:
+  - conda-forge::grep=3.11
+  - conda-forge::sed=4.7
+  - conda-forge::tar=1.34
diff --git a/modules/local/untar/main.nf b/modules/local/untar/main.nf
new file mode 100644
index 00000000..140d1b06
--- /dev/null
+++ b/modules/local/untar/main.nf
@@ -0,0 +1,54 @@
+process UNTAR {
+    tag "$archive"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+        'nf-core/ubuntu:20.04' }"
+
+    input:
+    tuple val(meta), path(archive)
+
+    output:
+    tuple val(meta), path("*.d"), emit: untar
+    path "versions.yml"         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def prefix = task.ext.prefix ?: archive.baseName.replaceAll(/\.tar(\.gz)?$/, '')
+
+    """
+    mkdir $prefix
+    depth=\$(tar -tf "${archive}" | grep '\\.d/\$' | head -n 1 | tr -cd '/' | wc -c)
+
+    tar \\
+        -C $prefix \\
+        -xavf \\
+        $args \\
+        $archive \\
+        --strip-components=\$depth \\
+        $args2
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix    = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, ""))
+    """
+    mkdir $prefix
+    touch ${prefix}/file.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/untar/meta.yml b/modules/local/untar/meta.yml
new file mode 100644
index 00000000..a9a2110f
--- /dev/null
+++ b/modules/local/untar/meta.yml
@@ -0,0 +1,46 @@
+name: untar
+description: Extract files.
+keywords:
+  - untar
+  - uncompress
+  - extract
+tools:
+  - untar:
+      description: |
+        Extract tar.gz files.
+      documentation: https://www.gnu.org/software/tar/manual/
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - archive:
+      type: file
+      description: File to be untar
+      pattern: "*.{tar}.{gz}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - untar:
+      type: directory
+      description: Directory containing contents of archive
+      pattern: "*/"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@matthdsm"
+  - "@jfy133"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@matthdsm"
+  - "@jfy133"
diff --git a/modules/local/unzip/environment.yml b/modules/local/unzip/environment.yml
new file mode 100644
index 00000000..d3a535f1
--- /dev/null
+++ b/modules/local/unzip/environment.yml
@@ -0,0 +1,7 @@
+name: unzip
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::p7zip=16.02
diff --git a/modules/local/unzip/main.nf b/modules/local/unzip/main.nf
new file mode 100644
index 00000000..3917a0ee
--- /dev/null
+++ b/modules/local/unzip/main.nf
@@ -0,0 +1,37 @@
+process UNZIP {
+    tag "$archive"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/p7zip:16.02' :
+        'biocontainers/p7zip:16.02' }"
+
+    input:
+    tuple val(meta), path(archive)
+
+    output:
+    tuple val(meta), path("*.d"), emit: unzipped_archive
+    path "versions.yml"         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." }
+
+    prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName)
+    """
+    7za \\
+        x \\
+        -o"." \\
+        $args \\
+        $archive
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/unzip/meta.yml b/modules/local/unzip/meta.yml
new file mode 100644
index 00000000..e8e377e2
--- /dev/null
+++ b/modules/local/unzip/meta.yml
@@ -0,0 +1,42 @@
+name: unzip
+description: Unzip ZIP archive files
+keywords:
+  - unzip
+  - decompression
+  - zip
+  - archiving
+tools:
+  - unzip:
+      description: p7zip is a quick port of 7z.exe and 7za.exe (command line version of 7zip, see www.7-zip.org) for Unix.
+      homepage: https://sourceforge.net/projects/p7zip/
+      documentation: https://sourceforge.net/projects/p7zip/
+      tool_dev_url: https://sourceforge.net/projects/p7zip"
+      licence: ["LGPL-2.1-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - archive:
+      type: file
+      description: ZIP file
+      pattern: "*.zip"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - unzipped_archive:
+      type: directory
+      description: Directory contents of the unzipped archive
+      pattern: "${archive.baseName}/"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@jfy133"
+maintainers:
+  - "@jfy133"
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
new file mode 100644
index 00000000..25910b34
--- /dev/null
+++ b/modules/nf-core/gunzip/environment.yml
@@ -0,0 +1,7 @@
+name: gunzip
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::sed=4.7
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
new file mode 100644
index 00000000..468a6f28
--- /dev/null
+++ b/modules/nf-core/gunzip/main.nf
@@ -0,0 +1,48 @@
+process GUNZIP {
+    tag "$archive"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+        'nf-core/ubuntu:20.04' }"
+
+    input:
+    tuple val(meta), path(archive)
+
+    output:
+    tuple val(meta), path("$gunzip"), emit: gunzip
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    gunzip = archive.toString() - '.gz'
+    """
+    # Not calling gunzip itself because it creates files
+    # with the original group ownership rather than the
+    # default one for that user / the work directory
+    gzip \\
+        -cd \\
+        $args \\
+        $archive \\
+        > $gunzip
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    gunzip = archive.toString() - '.gz'
+    """
+    touch $gunzip
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
new file mode 100644
index 00000000..231034f2
--- /dev/null
+++ b/modules/nf-core/gunzip/meta.yml
@@ -0,0 +1,39 @@
+name: gunzip
+description: Compresses and decompresses files.
+keywords:
+  - gunzip
+  - compression
+  - decompression
+tools:
+  - gunzip:
+      description: |
+        gzip is a file format and a software application used for file compression and decompression.
+      documentation: https://www.gnu.org/software/gzip/manual/gzip.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Optional groovy Map containing meta information
+        e.g. [ id:'test', single_end:false ]
+  - archive:
+      type: file
+      description: File to be compressed/uncompressed
+      pattern: "*.*"
+output:
+  - gunzip:
+      type: file
+      description: Compressed/uncompressed file
+      pattern: "*.*"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@jfy133"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@jfy133"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
new file mode 100644
index 00000000..6406008e
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -0,0 +1,36 @@
+nextflow_process {
+
+    name "Test Process GUNZIP"
+    script "../main.nf"
+    process "GUNZIP"
+    tag "gunzip"
+    tag "modules_nfcore"
+    tag "modules"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([
+                        [],
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
new file mode 100644
index 00000000..720fd9ff
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test.snap
@@ -0,0 +1,31 @@
+{
+    "Should run without failures": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+                ],
+                "gunzip": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-17T15:35:37.690477896"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml
new file mode 100644
index 00000000..fd3f6915
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/tags.yml
@@ -0,0 +1,2 @@
+gunzip:
+  - modules/nf-core/gunzip/**
diff --git a/nextflow.config b/nextflow.config
index 12736cd5..c6853513 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -232,6 +232,7 @@ profiles {
     test_mokapot      { includeConfig 'conf/test_mokapot.config'      }
     test_percolator   { includeConfig 'conf/test_percolator.config'   }
     test_ionannotator { includeConfig 'conf/test_ionannotator.config' }
+    test_timstof      { includeConfig 'conf/test_timstof.config'      }
     test_full         { includeConfig 'conf/test_full.config'         }
 }
 
diff --git a/subworkflows/local/prepare_spectra.nf b/subworkflows/local/prepare_spectra.nf
new file mode 100644
index 00000000..c4f60b7a
--- /dev/null
+++ b/subworkflows/local/prepare_spectra.nf
@@ -0,0 +1,76 @@
+/*
+ * Prepares the raw or compressed data holding spectra information for the subsequent database search.
+ */
+
+include { THERMORAWFILEPARSER    } from '../../modules/nf-core/thermorawfileparser/main'
+include { UNTAR                  } from '../../modules/local/untar/main'
+include { UNZIP                  } from '../../modules/local/unzip/main'
+include { TDF2MZML               } from '../../modules/local/tdf2mzml'
+include { GUNZIP                 } from '../../modules/nf-core/gunzip/main'
+include { OPENMS_PEAKPICKERHIRES } from '../../modules/nf-core/openms/peakpickerhires/main'
+
+workflow PREPARE_SPECTRA {
+    take:
+        ch_samplesheet
+
+    main:
+        ch_versions = Channel.empty()
+
+        ch_samplesheet
+        .branch {
+            meta, filename ->
+                raw : meta.ext == 'raw'
+                    return [ meta, filename ]
+                mzml : meta.ext == 'mzml'
+                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
+                mzml_gz : meta.ext == 'mzML.gz'
+                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
+                d : meta.ext == 'd'
+                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
+                d_tar : meta.ext == 'd.tar' | meta.ext == 'd.tar.gz'
+                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
+                d_zip : meta.ext == 'd.zip'
+                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
+                other : true }
+        .set { branched_ms_files }
+
+        // Raw file conversion
+        THERMORAWFILEPARSER(branched_ms_files.raw)
+        ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions)
+
+        // Decompress timsTOF archive for data conversion
+        UNTAR(branched_ms_files.d_tar)
+        ch_versions = ch_versions.mix(UNTAR.out.versions)
+
+        UNZIP(branched_ms_files.d_zip)
+        ch_versions = ch_versions.mix(UNZIP.out.versions)
+
+        ch_tdf_files = branched_ms_files.d
+                            .mix(UNTAR.out.untar,
+                                UNZIP.out.unzipped_archive)
+
+        // timsTOF data conversion
+        TDF2MZML(ch_tdf_files)
+        ch_versions = ch_versions.mix(TDF2MZML.out.versions)
+
+        // Gunzip mzML files
+        GUNZIP(branched_ms_files.mzml_gz)
+        // Initialize channel for ms files that do not need to be converted
+        ch_ms_files = branched_ms_files.mzml
+                        .mix(GUNZIP.out.gunzip,
+                            THERMORAWFILEPARSER.out.spectra,
+                            TDF2MZML.out.mzml)
+
+        // Optional: Run Peak Picking as Preprocessing
+        if (params.run_centroidisation) {
+            OPENMS_PEAKPICKERHIRES(ch_ms_files)
+            ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions)
+            ch_mzml_file = OPENMS_PEAKPICKERHIRES.out.mzml
+        } else {
+            ch_mzml_file = ch_ms_files
+        }
+
+    emit:
+        mzml = ch_mzml_file
+        versions = ch_versions
+}
diff --git a/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf b/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf
index ba086c31..dd5801b6 100644
--- a/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf
@@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION {
         // get number of files per sample-condition
         .map { group_meta, metas, files -> [ group_meta, files.size()] }
         .combine( ch_input, by:0 )
-        .map { group_meta, group_count, meta, file -> [meta + ['group_count':group_count, 'spectra':file.baseName, 'ext':file.getExtension().toLowerCase()], file] }
+        .map { group_meta, group_count, meta, file -> [meta + ['group_count':group_count, 'spectra':file.baseName.tokenize('.')[0], 'ext':getCustomExtension(file)], file] }
         .set { ch_samplesheet }
 
     //
@@ -161,6 +161,15 @@ def validateInputSamplesheet(input) {
     return [ metas[0], fastqs ]
 }
 
+def getCustomExtension(file) {
+    def name = file.getName()
+    if (name =~ /.*\.(d\.tar\.gz|d\.tar|d\.zip|mzML\.gz|raw|RAW|mzML|d)$/) {
+        return name.split("\\.").drop(1).join(".")
+    } else {
+        return file.getExtension().toLowerCase()
+    }
+}
+
 //
 // Generate methods description for MultiQC
 //
diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf
index fe16d1f4..911f729e 100644
--- a/workflows/mhcquant.nf
+++ b/workflows/mhcquant.nf
@@ -7,7 +7,7 @@
 //
 // MODULE: Loaded from modules/local/
 //
-include { TDF2MZML                   } from '../modules/local/tdf2mzml'
+
 include { OPENMS_FILEFILTER          } from '../modules/local/openms_filefilter'
 include { OPENMS_COMETADAPTER        } from '../modules/local/openms_cometadapter'
 include { OPENMS_PEPTIDEINDEXER      } from '../modules/local/openms_peptideindexer'
@@ -21,7 +21,8 @@ include { OPENMS_MZTABEXPORTER       } from '../modules/local/openms_mztabexport
 //
 // SUBWORKFLOW: Loaded from subworkflows/local/
 //
-include { QUANT            } from '../subworkflows/local/quant'
+include { PREPARE_SPECTRA } from '../subworkflows/local/prepare_spectra'
+include { QUANT           } from '../subworkflows/local/quant'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -32,9 +33,7 @@ include { QUANT            } from '../subworkflows/local/quant'
 //
 // MODULE: Installed directly from nf-core/modules
 //
-include { THERMORAWFILEPARSER                        } from '../modules/nf-core/thermorawfileparser/main'
 include { OPENMS_DECOYDATABASE                       } from '../modules/nf-core/openms/decoydatabase/main'
-include { OPENMS_PEAKPICKERHIRES                     } from '../modules/nf-core/openms/peakpickerhires/main'
 include { OPENMS_IDMERGER                            } from '../modules/nf-core/openms/idmerger/main'
 include { OPENMS_IDSCORESWITCHER                     } from '../modules/nf-core/openms/idscoreswitcher/main.nf'
 include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/nf-core/openms/idfilter/main'
@@ -60,17 +59,9 @@ workflow MHCQUANT {
     ch_versions = Channel.empty()
     ch_multiqc_files = Channel.empty()
 
-    ch_samplesheet
-        .branch {
-            meta, filename ->
-                raw : meta.ext == 'raw'
-                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
-                mzml : meta.ext == 'mzml'
-                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
-                tdf : meta.ext == 'd'
-                    return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
-                other : true }
-        .set { branched_ms_files }
+    // Prepare spectra files (Decompress archives, convert to mzML, centroid if specified)
+    PREPARE_SPECTRA(ch_samplesheet)
+    ch_versions = ch_versions.mix(PREPARE_SPECTRA.out.versions)
 
     // Decoy Database creation
     if (!params.skip_decoy_generation) {
@@ -83,35 +74,13 @@ workflow MHCQUANT {
         ch_decoy_db = ch_fasta.map{ meta, fasta -> [fasta] }
     }
 
-    // Initialize channel for ms files that do not need to be converted
-    ch_ms_files = branched_ms_files.mzml
-
-    // Raw file conversion
-    THERMORAWFILEPARSER(branched_ms_files.raw)
-    ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions)
-    ch_ms_files = ch_ms_files.mix(THERMORAWFILEPARSER.out.spectra)
-
-    // timsTOF data conversion
-    TDF2MZML(branched_ms_files.tdf)
-    ch_versions = ch_versions.mix(TDF2MZML.out.versions)
-    ch_ms_files = ch_ms_files.mix(TDF2MZML.out.mzml)
-
-    // Optional: Run Peak Picking as Preprocessing
-    if (params.run_centroidisation) {
-        OPENMS_PEAKPICKERHIRES(ch_ms_files)
-        ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions)
-        ch_mzml_file = OPENMS_PEAKPICKERHIRES.out.mzml
-    } else {
-        ch_mzml_file = ch_ms_files
-    }
-
     // Optionally clean up mzML files
     if (params.filter_mzml){
-        OPENMS_FILEFILTER(ch_mzml_file)
+        OPENMS_FILEFILTER(PREPARE_SPECTRA.out.mzml)
         ch_versions = ch_versions.mix(OPENMS_FILEFILTER.out.versions)
         ch_clean_mzml_file = OPENMS_FILEFILTER.out.cleaned_mzml
     } else {
-        ch_clean_mzml_file = ch_mzml_file
+        ch_clean_mzml_file = PREPARE_SPECTRA.out.mzml
     }
 
     // Run comet database search

From 0deda22c4669b15d18fb9ae2c0b2b6ef41154a44 Mon Sep 17 00:00:00 2001
From: jonasscheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 14 May 2024 11:59:33 +0000
Subject: [PATCH 02/12] update test config paths

---
 conf/test_ionannotator.config | 4 ++--
 conf/test_mokapot.config      | 4 ++--
 conf/test_percolator.config   | 4 ++--
 conf/test_timstof.config      | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/conf/test_ionannotator.config b/conf/test_ionannotator.config
index 635e4870..96a84143 100644
--- a/conf/test_ionannotator.config
+++ b/conf/test_ionannotator.config
@@ -20,8 +20,8 @@ params {
     max_time   = '6.h'
 
     // Input data
-    fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv'
+    input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
+    fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true
diff --git a/conf/test_mokapot.config b/conf/test_mokapot.config
index 3dd49220..c28fb0a9 100644
--- a/conf/test_mokapot.config
+++ b/conf/test_mokapot.config
@@ -20,8 +20,8 @@ params {
     max_time   = '6.h'
 
     // Input data
-    fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv'
+    input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
+    fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true
diff --git a/conf/test_percolator.config b/conf/test_percolator.config
index 1bf06c6e..d0f8da52 100644
--- a/conf/test_percolator.config
+++ b/conf/test_percolator.config
@@ -20,8 +20,8 @@ params {
     max_time   = '6.h'
 
     // Input data
-    fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv'
+    input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
+    fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true
diff --git a/conf/test_timstof.config b/conf/test_timstof.config
index 067f4437..f560d98b 100644
--- a/conf/test_timstof.config
+++ b/conf/test_timstof.config
@@ -20,8 +20,8 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input = '/mnt/volume/dev/mhcquant/samplesheets/samplesheet_timstof_test.tsv'
-    fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
+    input = params.pipelines_testdata_base_path + 'mhcquant/testdata/sample_sheet_timstof.tsv'
+    fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true

From 16218759e1b091980f692f29c0e7497ffd6385cb Mon Sep 17 00:00:00 2001
From: jonasscheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 14 May 2024 12:03:59 +0000
Subject: [PATCH 03/12] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f2104f08..44760b66 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@
 
 It was specifically designed to analyse immunopeptidomics data, which deals with the analysis of affinity purified, unspecifically cleaved peptides that have recently been discussed intensively in [the context of cancer vaccines](https://www.nature.com/articles/ncomms13404).
 
-The workflow is based on the OpenMS C++ framework for computational mass spectrometry. RAW files (mzML) serve as inputs and a database search (Comet) is performed based on a given input protein database. FDR rescoring is applied using Percolator based on a competitive target-decoy approach (reversed decoys). For label free quantification all input files undergo identification based retention time alignment (MapAlignerIdentification), and targeted feature extraction matching ids between runs (FeatureFinderIdentification). In addition, a variant calling file (vcf) can be specified to translate variants into proteins that will be included in the database search and binding predictions on specified alleles (alleles.tsv) using MHCFlurry (Class 1) or MHCNugget (Class 2) can be directly run on the output peptide lists. Moreover, if a vcf file was specified, neoepitopes will automatically be determined and binding predictions can also directly be predicted for them.
+The workflow is based on the OpenMS C++ framework for computational mass spectrometry. RAW files (mzML) serve as inputs and a database search (Comet) is performed based on a given input protein database. FDR rescoring is applied using Percolator based on a competitive target-decoy approach (reversed decoys). For label free quantification all input files undergo identification based retention time alignment (MapAlignerIdentification), and targeted feature extraction matching ids between runs (FeatureFinderIdentification).
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
 

From b8de2b0bab395d7ed59626e8a66caa39e066f7ea Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Wed, 15 May 2024 09:48:06 +0000
Subject: [PATCH 04/12] fix prettier

---
 .github/workflows/ci.yml              | 13 ++++++++++---
 subworkflows/local/prepare_spectra.nf |  1 +
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 314858d3..33b7f853 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - dev
+      - master
   pull_request:
   release:
     types: [published]
@@ -58,7 +59,7 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        tests: ["test_mokapot", "test_percolator", "test_ionannotator", "test_timstof"]
+        tests: ["test_mokapot", "test_percolator", "test_ionannotator", "test_timstof", "test_full"]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
@@ -71,6 +72,12 @@ jobs:
         run: |
           wget -qO- get.nextflow.io | bash
           sudo mv nextflow /usr/local/bin/
-      - name: Run pipeline with profile ${{ matrix.tests }}
+      - name:
+          Run pipeline with profile ${{ matrix.tests }}
+          # Run test_timstof and test_full only on push against master, since they are time-consuming
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results
+          if [[ "${{ github.ref }}" == "refs/heads/master" ]]; then
+            nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results
+          elif [[ "${{ github.ref }}" != "refs/heads/master" && ("${{ matrix.tests }}" != "test_timstof" && "${{ matrix.tests }}" != "test_full") ]]; then
+            nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results
+          fi
diff --git a/subworkflows/local/prepare_spectra.nf b/subworkflows/local/prepare_spectra.nf
index c4f60b7a..0e7bd14f 100644
--- a/subworkflows/local/prepare_spectra.nf
+++ b/subworkflows/local/prepare_spectra.nf
@@ -55,6 +55,7 @@ workflow PREPARE_SPECTRA {
 
         // Gunzip mzML files
         GUNZIP(branched_ms_files.mzml_gz)
+        ch_versions = ch_versions.mix(GUNZIP.out.versions)
         // Initialize channel for ms files that do not need to be converted
         ch_ms_files = branched_ms_files.mzml
                         .mix(GUNZIP.out.gunzip,

From 11c1f1940075229c729608edd4d2c2f7e6b2f187 Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Wed, 15 May 2024 09:54:31 +0000
Subject: [PATCH 05/12] switch from tab to space tabbing

---
 conf/test_timstof.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/test_timstof.config b/conf/test_timstof.config
index f560d98b..30d0b172 100644
--- a/conf/test_timstof.config
+++ b/conf/test_timstof.config
@@ -27,10 +27,10 @@ params {
     skip_quantification = true
 
     // Search settings
-	activation_method        = 'CID'
-	prec_charge              = '1:4'
-	precursor_mass_tolerance = 20
-	fragment_mass_tolerance  = 0.01
+    activation_method        = 'CID'
+    prec_charge              = '1:4'
+    precursor_mass_tolerance = 20
+    fragment_mass_tolerance  = 0.01
     spectrum_batch_size      = 1000
 
     // MS²Rescore settings

From 65980b1e0a1642ac94e2671c9fc8daa33353ec06 Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Wed, 15 May 2024 12:13:53 +0000
Subject: [PATCH 06/12] alter ci to run test_timstof and test_full only on push
 against main

---
 .github/workflows/ci.yml | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 33b7f853..a92c3c3d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,7 +4,7 @@ on:
   push:
     branches:
       - dev
-      - master
+      - main
   pull_request:
   release:
     types: [published]
@@ -59,7 +59,7 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        tests: ["test_mokapot", "test_percolator", "test_ionannotator", "test_timstof", "test_full"]
+        tests: ${{ github.ref == 'refs/heads/main' && ['test_mokapot', 'test_percolator', 'test_ionannotator', 'test_timstof', 'test_full'] || ['test_mokapot', 'test_percolator', 'test_ionannotator'] }}
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
@@ -72,12 +72,6 @@ jobs:
         run: |
           wget -qO- get.nextflow.io | bash
           sudo mv nextflow /usr/local/bin/
-      - name:
-          Run pipeline with profile ${{ matrix.tests }}
-          # Run test_timstof and test_full only on push against master, since they are time-consuming
+      - name: Run pipeline with profile ${{ matrix.tests }}
         run: |
-          if [[ "${{ github.ref }}" == "refs/heads/master" ]]; then
-            nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results
-          elif [[ "${{ github.ref }}" != "refs/heads/master" && ("${{ matrix.tests }}" != "test_timstof" && "${{ matrix.tests }}" != "test_full") ]]; then
-            nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results
-          fi
+          nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results

From 95cc7e01e74a87cb9c93d88c8f5c50a61a4097ec Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Wed, 15 May 2024 12:36:57 +0000
Subject: [PATCH 07/12] create third workflow only running on push against main

---
 .github/workflows/ci.yml | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a92c3c3d..424c1426 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,7 +4,7 @@ on:
   push:
     branches:
       - dev
-      - main
+      - master
   pull_request:
   release:
     types: [published]
@@ -59,7 +59,41 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        tests: ${{ github.ref == 'refs/heads/main' && ['test_mokapot', 'test_percolator', 'test_ionannotator', 'test_timstof', 'test_full'] || ['test_mokapot', 'test_percolator', 'test_ionannotator'] }}
+        tests: ["test_mokapot", "test_percolator", "test_ionannotator"]
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+      - name: Install Nextflow
+        env:
+          NXF_VER: ${{ matrix.NXF_VER }}
+          # Uncomment only if the edge release is more recent than the latest stable release
+          # See https://github.com/nextflow-io/nextflow/issues/2467
+          # NXF_EDGE: ${{ matrix.NXF_EDGE }}
+        run: |
+          wget -qO- get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+      - name: Run pipeline with profile ${{ matrix.tests }}
+        run: |
+          nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results
+
+  # Define a second workflow only against main with additional tests: test_timstof and test_full
+  profile_main:
+    name: Run profile tests and additional full tests
+    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'nf-core/mhcquant' }}
+    runs-on: ubuntu-latest
+    env:
+      NXF_VER: ${{ matrix.nxf_ver }}
+      NXF_ANSI_LOG: false
+    strategy:
+      matrix:
+        include:
+          # Test pipeline minimum Nextflow version
+          - NXF_VER: "23.04.0"
+            NXF_EDGE: ""
+          # Test latest edge release of Nextflow
+          - NXF_VER: ""
+            NXF_EDGE: "1"
+        tests: ["test_timstof", "test_full"]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2

From 99c420162f3fa87e98e2bcbef9428982990b0034 Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 28 May 2024 12:43:44 +0000
Subject: [PATCH 08/12] remove full test from ci again, not enough ressources
 on GH

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 424c1426..87c109d6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -93,7 +93,7 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        tests: ["test_timstof", "test_full"]
+        tests: ["test_timstof"]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2

From 16f3e874e50878cb08256e96a349ee26031f3693 Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 28 May 2024 12:45:12 +0000
Subject: [PATCH 09/12] rename --skip_quantification to --quantify and set ID
 mode to default

---
 conf/modules.config           |  2 +-
 conf/test.config              |  2 --
 conf/test_full.config         |  3 +++
 conf/test_ionannotator.config |  3 +--
 conf/test_mokapot.config      |  3 +--
 conf/test_percolator.config   |  3 +--
 conf/test_timstof.config      |  3 ---
 docs/output.md                | 11 +++++++++--
 nextflow.config               |  2 +-
 nextflow_schema.json          |  6 +++---
 subworkflows/local/quant.nf   |  2 +-
 workflows/mhcquant.nf         |  2 +-
 12 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index bfae605b..2e2ddfb6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -132,7 +132,7 @@ process {
 }
 
 process {
-    if (!params.skip_quantification) {
+    if (params.quantify) {
         withName: 'NFCORE_MHCQUANT:MHCQUANT:QUANT:OPENMS_IDSCORESWITCHER' {
             ext.args   = [
                 "-new_score COMET:xcorr",
diff --git a/conf/test.config b/conf/test.config
index 15b7695c..244fa06e 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -23,6 +23,4 @@ params {
     input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
     fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
-    // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
-    skip_quantification = true
 }
diff --git a/conf/test_full.config b/conf/test_full.config
index 3c7494c4..11e58919 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -21,4 +21,7 @@ params {
     // Test multiple modifications
     fixed_mods    = 'Oxidation (M),Carbamidomethyl (C)'
     variable_mods = 'Oxidation (M),Carbamidomethyl (C)'
+
+    // Pipeline settings
+    quantify = true
 }
diff --git a/conf/test_ionannotator.config b/conf/test_ionannotator.config
index 96a84143..cdb37661 100644
--- a/conf/test_ionannotator.config
+++ b/conf/test_ionannotator.config
@@ -23,8 +23,7 @@ params {
     input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
     fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
-    // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
-    skip_quantification = true
+    // Pipeline settings
     annotate_ions       = true
     use_x_ions          = true
     use_z_ions          = true
diff --git a/conf/test_mokapot.config b/conf/test_mokapot.config
index c28fb0a9..80eee016 100644
--- a/conf/test_mokapot.config
+++ b/conf/test_mokapot.config
@@ -23,7 +23,6 @@ params {
     input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
     fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
-    // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
-    skip_quantification = true
+    // Pipeline settings
     rescoring_engine    = 'mokapot'
 }
diff --git a/conf/test_percolator.config b/conf/test_percolator.config
index d0f8da52..f64495b8 100644
--- a/conf/test_percolator.config
+++ b/conf/test_percolator.config
@@ -23,7 +23,6 @@ params {
     input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
     fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
-    // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
-    skip_quantification = true
+    // Pipeline settings
     rescoring_engine    = 'percolator'
 }
diff --git a/conf/test_timstof.config b/conf/test_timstof.config
index 30d0b172..58632b3c 100644
--- a/conf/test_timstof.config
+++ b/conf/test_timstof.config
@@ -23,9 +23,6 @@ params {
     input = params.pipelines_testdata_base_path + 'mhcquant/testdata/sample_sheet_timstof.tsv'
     fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
 
-    // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
-    skip_quantification = true
-
     // Search settings
     activation_method        = 'CID'
     prec_charge              = '1:4'
diff --git a/docs/output.md b/docs/output.md
index 3018292a..f39d180f 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -21,9 +21,12 @@ MzTab files contain many columns and annotate the most important information - h
 PEP  sequence  accession  best_search_engine_score[1]  retention_time  charge  mass_to_charge  peptide_abundance_study_variable[1]
 ```
 
-Most important to know is that in this format we annotated the Comet XCorr of each peptide identification in the `best_search_engine_score[1]` column and peptide quantities in the `peptide_abundance_study_variable` columns. If `--skip_quantification` is specified the `best_search_engine_score[1]` holds the percolator q-value.
+By default (only identification) the `best_search_engine_score[1]` holds the percolator q-value. If `--quantify` is specified we annotated the Comet XCorr of each peptide identification in the `best_search_engine_score[1]` column and peptide quantities in the `peptide_abundance_study_variable` columns.
 
-The TSV output file is an alternative output of [OpenMS](https://www.openms.de/) comprising similar information to the mzTab output. A brief explanation of the structure is listed below. See documentation of the format or PSI documentation for more information about [annotated scores and format](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_TextExporter.html).
+The TSV output file is an alternative output of [OpenMS](https://www.openms.de/) comprising similar information to the mzTab output. The TSV output of identification runs is a simple tab-delimited file holding information about FDR-filtered peptides and currently all values produced by `MS²Rescore`. The TSV file in quantification mode (by using `--quantify`) is more complex and described in more detail below
+
+<details markdown="1">
+<summary>TSV Quant</summary
 
 MAP contains information about the different mzML files that were provided initially
 
@@ -61,6 +64,10 @@ PEPTIDE contains information about peptide hits that were identified and corresp
 #PEPTIDE        rt      mz      score   rank    sequence        charge  aa_before       aa_after        score_type      search_identifier       accessions      FFId_category   fea
 ```
 
+See documentation of the format or PSI documentation for more information about [annotated scores and format](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_TextExporter.html).
+
+</details>
+
 ### Intermediate results
 
 <details  markdown="1">
diff --git a/nextflow.config b/nextflow.config
index c6853513..348bd667 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,7 +17,7 @@ params {
     skip_decoy_generation           = false
     run_centroidisation             = false
     filter_mzml                     = false
-    skip_quantification             = true
+    quantify                        = false
     annotate_ions                   = false
 
     // Comet search parameters
diff --git a/nextflow_schema.json b/nextflow_schema.json
index c81e02b3..943c6b02 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -323,11 +323,11 @@
             "description": "",
             "default": "",
             "properties": {
-                "skip_quantification": {
+                "quantify": {
                     "type": "boolean",
-                    "default": true,
+                    "default": false,
                     "fa_icon": "fas fa-fast-forward",
-                    "description": "Skip quantification and only yield peptide identifications"
+                    "description": "Turn on quantification mode"
                 },
                 "max_rt_alignment_shift": {
                     "type": "integer",
diff --git a/subworkflows/local/quant.nf b/subworkflows/local/quant.nf
index a7dda954..fd7c90a3 100644
--- a/subworkflows/local/quant.nf
+++ b/subworkflows/local/quant.nf
@@ -1,5 +1,5 @@
 /*
- * Perform the quantification of the samples when the parameter --skip_quantification is not provided
+ * Perform the quantification of the samples when the parameter --quantify is provided
  * This workflow splits the merged percolator output into the individual runs and filters them based on the q-value
  * It then aligns the retention times of the runs and merges the idxml files together to use them as id_ext in featurefinder
  * Finally, it performs the quantification and emits the consensusXML file
diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf
index 911f729e..018e22ae 100644
--- a/workflows/mhcquant.nf
+++ b/workflows/mhcquant.nf
@@ -142,7 +142,7 @@ workflow MHCQUANT {
     //
     // SUBWORKFLOW: QUANT
     //
-    if (!params.skip_quantification) {
+    if (params.quantify) {
         QUANT(merge_meta_map, ch_rescored_runs, ch_filter_q_value, ch_clean_mzml_file)
         ch_versions = ch_versions.mix(QUANT.out.versions)
         ch_output = QUANT.out.consensusxml

From 3caf8c7394af994d18f6cd775e6cabd920f8fcec Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 28 May 2024 12:46:31 +0000
Subject: [PATCH 10/12] fix prettier

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d0ffcecd..fdb4c6de 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Template update 2.13.1 [#313](https://github.com/nf-core/mhcquant/pull/313)
 - Template update 2.14.1 [#320](https://github.com/nf-core/mhcquant/pull/320)
 
+### `Changed`
+
+- Set identifcation mode as default and rename `--skip_quantification` to `--quantify` [#323](https://github.com/nf-core/mhcquant/pull/323)
+
 ### `Deprecated`
 
 - Removed MS²PIP and DeepLC modules. These feature generators are now called via the MS²Rescore framework [#293](https://github.com/nf-core/mhcquant/pull/293)

From 586d4739c258975ded4d63fd0a8c4f4c5d5a6bc3 Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 28 May 2024 20:48:28 +0000
Subject: [PATCH 11/12] set all extentions to lower case for simple extention
 detection

---
 subworkflows/local/prepare_spectra.nf                     | 2 +-
 subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/prepare_spectra.nf b/subworkflows/local/prepare_spectra.nf
index 0e7bd14f..9acacd6d 100644
--- a/subworkflows/local/prepare_spectra.nf
+++ b/subworkflows/local/prepare_spectra.nf
@@ -23,7 +23,7 @@ workflow PREPARE_SPECTRA {
                     return [ meta, filename ]
                 mzml : meta.ext == 'mzml'
                     return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
-                mzml_gz : meta.ext == 'mzML.gz'
+                mzml_gz : meta.ext == 'mzml.gz'
                     return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
                 d : meta.ext == 'd'
                     return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ]
diff --git a/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf b/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf
index dd5801b6..cc178b95 100644
--- a/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mhcquant_pipeline/main.nf
@@ -164,7 +164,7 @@ def validateInputSamplesheet(input) {
 def getCustomExtension(file) {
     def name = file.getName()
     if (name =~ /.*\.(d\.tar\.gz|d\.tar|d\.zip|mzML\.gz|raw|RAW|mzML|d)$/) {
-        return name.split("\\.").drop(1).join(".")
+        return name.split("\\.").drop(1).join(".").toLowerCase()
     } else {
         return file.getExtension().toLowerCase()
     }

From 7a6e0f675b5e7d08a6ee565cd20ffc8b81114f9b Mon Sep 17 00:00:00 2001
From: Jonas Scheid <jonas.scheid@uni-tuebingen.de>
Date: Tue, 28 May 2024 20:53:22 +0000
Subject: [PATCH 12/12] rename timstof tests

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 87c109d6..43ecd6b6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -78,7 +78,7 @@ jobs:
 
   # Define a second workflow only against main with additional tests: test_timstof and test_full
   profile_main:
-    name: Run profile tests and additional full tests
+    name: Run timsTOF profile tests
     if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'nf-core/mhcquant' }}
     runs-on: ubuntu-latest
     env: