From 5e8cf1006c6055372a720464f0d81fefeb5516d1 Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Mon, 8 Apr 2024 14:59:22 +0200
Subject: [PATCH 01/16] add db_type feature into database

---
 .github/workflows/ci.yml         | 6 ++++--
 assets/schema_database.json      | 7 ++++++-
 conf/test.config                 | 2 +-
 conf/test_adapterremoval.config  | 2 +-
 conf/test_bbduk.config           | 2 +-
 conf/test_falco.config           | 2 +-
 conf/test_fastp.config           | 2 +-
 conf/test_full.config            | 2 +-
 conf/test_krakenuniq.config      | 2 +-
 conf/test_malt.config            | 2 +-
 conf/test_nopreprocessing.config | 2 +-
 conf/test_noprofiling.config     | 2 +-
 conf/test_nothing.config         | 2 +-
 conf/test_prinseqplusplus.config | 2 +-
 subworkflows/local/profiling.nf  | 8 ++++++++
 15 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e6d5d4df..a5eb375f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,8 +65,10 @@ jobs:
           if [[ "${{ matrix.tags }}" == "test_motus" ]]; then
             wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
             python downloadDB.py --no-download-progress
-            echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
-            echo "motus,db_mOTU,,db_mOTU" >> 'database_motus.csv'
+            echo 'tool,db_name,db_params,db_type,db_path' > 'database_motus.csv'
+            echo "motus,db1_mOTU,,short,db_mOTU" >> 'database_motus.csv'
+            echo "motus,db2_mOTU,prep_long,long,db_mOTU" >> 'database_motus.csv'
+            echo "motus,db2_mOTU,,both,db_mOTU" >> 'database_motus.csv'
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
           else
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};
diff --git a/assets/schema_database.json b/assets/schema_database.json
index 1f52a25c..fcf52fb6 100644
--- a/assets/schema_database.json
+++ b/assets/schema_database.json
@@ -57,6 +57,11 @@
                 "errorMessage": "Invalid database db_params entry. No quotes allowed.",
                 "meta": ["db_params"]
             },
+            "db_type": {
+                "type": "string",
+                "enum": ["short", "long", "both"],
+                "meta": ["db_type"]
+            },
             "db_path": {
                 "type": "string",
                 "exists": true,
@@ -64,7 +69,7 @@
                 "errorMessage": "db_path should be either a file path or a directory."
             }
         },
-        "required": ["tool", "db_name", "db_path"],
+        "required": ["tool", "db_name", "db_type", "db_path"],
         "uniqueEntries": ["tool", "db_name"]
     }
 }
diff --git a/conf/test.config b/conf/test.config
index c11f27b6..d3f5969d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index c3422d02..5419a276 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_tool                     = 'adapterremoval'
diff --git a/conf/test_bbduk.config b/conf/test_bbduk.config
index 623fe191..c73823c4 100644
--- a/conf/test_bbduk.config
+++ b/conf/test_bbduk.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     perform_shortread_complexityfilter    = true
diff --git a/conf/test_falco.config b/conf/test_falco.config
index 3fb77c03..ff1e9ded 100644
--- a/conf/test_falco.config
+++ b/conf/test_falco.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     preprocessing_qc_tool                 = 'falco'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
diff --git a/conf/test_fastp.config b/conf/test_fastp.config
index 3feeae7a..7a896e42 100644
--- a/conf/test_fastp.config
+++ b/conf/test_fastp.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_tool                     = 'fastp'
diff --git a/conf/test_full.config b/conf/test_full.config
index 2a74a80b..8dfa432b 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -14,7 +14,7 @@ params {
 
     // Input data for full size test
     input     = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_full.csv'
-    databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_full_v1.1.csv'
+    databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_full_v1.2.csv'
 
     // Genome references
     hostremoval_reference = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/819/615/GCA_000819615.1_ViralProj14015/GCA_000819615.1_ViralProj14015_genomic.fna.gz'
diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config
index e93de158..61827b83 100644
--- a/conf/test_krakenuniq.config
+++ b/conf/test_krakenuniq.config
@@ -25,7 +25,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
diff --git a/conf/test_malt.config b/conf/test_malt.config
index 7e5f2df3..b5390972 100644
--- a/conf/test_malt.config
+++ b/conf/test_malt.config
@@ -25,7 +25,7 @@ params {
 
     // Input data
     input                                 = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_malt.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = false
     perform_longread_qc                   = false
     perform_shortread_complexityfilter    = false
diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config
index 004a49e8..bd4f68e5 100644
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = false
     perform_longread_qc                   = false
     perform_shortread_complexityfilter    = false
diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config
index 7cf2317d..9ad84acf 100644
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index ed247ef4..577eb3d0 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = false
     perform_longread_qc                   = false
     perform_shortread_complexityfilter    = false
diff --git a/conf/test_prinseqplusplus.config b/conf/test_prinseqplusplus.config
index acc23aa8..b4beea92 100644
--- a/conf/test_prinseqplusplus.config
+++ b/conf/test_prinseqplusplus.config
@@ -21,7 +21,7 @@ params {
 
     // Input data
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     perform_shortread_complexityfilter    = true
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index e306f1de..2241d1eb 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -67,6 +67,14 @@ workflow PROFILING {
                     [meta + [id: "${meta.id}${meta.single_end ? '_se' : '_pe'}"], reads]
             }
             .combine(databases)
+            .filter { it ->
+                def platform = it[0]['instrument_platform']
+                def db_type = it[2]['db_type']
+                def is_long_read = platform == 'OXFORD_NANOPORE'
+                def is_long_db = db_type == 'long' || db_type == 'both'
+                def is_short_db = db_type == 'short' || db_type == 'both'
+                (is_long_read && is_long_db) || (!is_long_read && is_short_db)
+            }
             .branch {
                 centrifuge: it[2]['tool'] == 'centrifuge'
                 diamond: it[2]['tool'] == 'diamond'

From 226eb7b7b9a84ce576163e56091ac530e064583d Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Thu, 11 Apr 2024 09:41:26 +0200
Subject: [PATCH 02/16] fix the merge conflicts

---
 conf/test.config                | 32 --------------------------------
 conf/test_adapterremoval.config | 26 --------------------------
 conf/test_fastp.config          | 27 ---------------------------
 conf/test_noprofiling.config    | 26 --------------------------
 conf/test_nothing.config        | 25 -------------------------
 5 files changed, 136 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index fc96f44a..47e2c57a 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -20,7 +20,6 @@ params {
     max_time   = '6.h'
 
     // Input data
-<<<<<<< HEAD
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
@@ -49,37 +48,6 @@ params {
     kraken2_save_reads                    = true
     centrifuge_save_reads                 = true
     run_profile_standardisation           = true
-=======
-    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
-    perform_shortread_qc                   = true
-    perform_longread_qc                    = true
-    shortread_qc_mergepairs                = true
-    perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter     = true
-    perform_shortread_hostremoval          = true
-    perform_longread_hostremoval           = true
-    perform_runmerging                     = true
-    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                              = true
-    run_kraken2                            = true
-    run_bracken                            = true
-    run_malt                               = false
-    run_metaphlan                          = true
-    run_centrifuge                         = true
-    run_diamond                            = true
-    run_krakenuniq                         = true
-    run_motus                              = false
-    run_ganon                              = true
-    run_krona                              = true
-    run_kmcp                               = true
-    kmcp_mode                              = 0
-    krona_taxonomy_directory               = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
-    malt_save_reads                        = true
-    kraken2_save_reads                     = true
-    centrifuge_save_reads                  = true
-    run_profile_standardisation            = true
->>>>>>> bouncy-basenji
 }
 
 process {
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index bfddd117..d6582373 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -20,7 +20,6 @@ params {
     max_time   = '6.h'
 
     // Input data
-<<<<<<< HEAD
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
@@ -43,31 +42,6 @@ params {
     run_ganon                             = false
     run_kmcp                              = false
     kmcp_mode                             = 0
-=======
-    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
-    perform_shortread_qc                   = true
-    perform_longread_qc                    = true
-    shortread_qc_tool                      = 'adapterremoval'
-    perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter     = true
-    perform_shortread_hostremoval          = true
-    perform_longread_hostremoval           = true
-    perform_runmerging                     = true
-    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                              = true
-    run_kraken2                            = true
-    run_bracken                            = false
-    run_malt                               = false
-    run_metaphlan                          = false
-    run_centrifuge                         = false
-    run_diamond                            = false
-    run_krakenuniq                         = false
-    run_motus                              = false
-    run_ganon                              = false
-    run_kmcp                               = false
-    kmcp_mode                              = 0
->>>>>>> bouncy-basenji
 }
 
 process {
diff --git a/conf/test_fastp.config b/conf/test_fastp.config
index 836bef09..57284db6 100644
--- a/conf/test_fastp.config
+++ b/conf/test_fastp.config
@@ -20,7 +20,6 @@ params {
     max_time   = '6.h'
 
     // Input data
-<<<<<<< HEAD
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
@@ -44,32 +43,6 @@ params {
     run_ganon                             = false
     run_kmcp                              = false
     kmcp_mode                             = 0
-=======
-    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
-    perform_shortread_qc                   = true
-    perform_longread_qc                    = true
-    shortread_qc_tool                      = 'fastp'
-    perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter     = true
-    shortread_complexityfilter_tool        = 'fastp'
-    perform_shortread_hostremoval          = true
-    perform_longread_hostremoval           = true
-    perform_runmerging                     = true
-    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                              = true
-    run_kraken2                            = true
-    run_bracken                            = false
-    run_malt                               = false
-    run_metaphlan                          = false
-    run_centrifuge                         = false
-    run_diamond                            = false
-    run_krakenuniq                         = false
-    run_motus                              = false
-    run_ganon                              = false
-    run_kmcp                               = false
-    kmcp_mode                              = 0
->>>>>>> bouncy-basenji
 }
 
 process {
diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config
index 73667bd2..6c169408 100644
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@@ -20,7 +20,6 @@ params {
     max_time   = '6.h'
 
     // Input data
-<<<<<<< HEAD
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = true
@@ -43,31 +42,6 @@ params {
     run_kmcp                              = false
     kmcp_mode                             = 0
     run_ganon                             = false
-=======
-    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
-    perform_shortread_qc                   = true
-    perform_longread_qc                    = true
-    shortread_qc_mergepairs                = true
-    perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter     = true
-    perform_shortread_hostremoval          = true
-    perform_longread_hostremoval           = true
-    perform_runmerging                     = true
-    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                              = false
-    run_kraken2                            = false
-    run_bracken                            = false
-    run_malt                               = false
-    run_metaphlan                          = false
-    run_centrifuge                         = false
-    run_diamond                            = false
-    run_krakenuniq                         = false
-    run_motus                              = false
-    run_kmcp                               = false
-    kmcp_mode                              = 0
-    run_ganon                              = false
->>>>>>> bouncy-basenji
 }
 
 process {
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index 91bdbaf7..93d126bf 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -20,7 +20,6 @@ params {
     max_time   = '6.h'
 
     // Input data
-<<<<<<< HEAD
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = false
@@ -42,30 +41,6 @@ params {
     run_kmcp                              = false
     kmcp_mode                             = 0
     run_ganon                             = false
-=======
-    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
-    perform_shortread_qc                   = false
-    perform_longread_qc                    = false
-    perform_shortread_complexityfilter     = false
-    perform_shortread_redundancyestimation = false
-    perform_shortread_hostremoval          = false
-    perform_longread_hostremoval           = false
-    perform_runmerging                     = false
-    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                              = false
-    run_kraken2                            = false
-    run_bracken                            = false
-    run_malt                               = false
-    run_metaphlan                          = false
-    run_centrifuge                         = false
-    run_diamond                            = false
-    run_krakenuniq                         = false
-    run_motus                              = false
-    run_kmcp                               = false
-    kmcp_mode                              = 0
-    run_ganon                              = false
->>>>>>> bouncy-basenji
 }
 
 process {

From 921130810434f3d4f77f257efd281520abc10f08 Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Thu, 11 Apr 2024 09:54:25 +0200
Subject: [PATCH 03/16] Add missing information after fixing the merge
 conflicts.

---
 conf/test.config                 |  1 +
 conf/test_adapterremoval.config  |  1 +
 conf/test_fastp.config           |  1 +
 conf/test_nopreprocessing.config | 26 --------------------------
 conf/test_noprofiling.config     |  1 +
 conf/test_nothing.config         |  1 +
 6 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 47e2c57a..d6395c94 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,6 +25,7 @@ params {
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
+    perform_shortread_redundancyestimation = true
     perform_shortread_complexityfilter    = true
     perform_shortread_hostremoval         = true
     perform_longread_hostremoval          = true
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index d6582373..be77ded0 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -25,6 +25,7 @@ params {
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_tool                     = 'adapterremoval'
+    perform_shortread_redundancyestimation = true
     perform_shortread_complexityfilter    = true
     perform_shortread_hostremoval         = true
     perform_longread_hostremoval          = true
diff --git a/conf/test_fastp.config b/conf/test_fastp.config
index 57284db6..ebd8f618 100644
--- a/conf/test_fastp.config
+++ b/conf/test_fastp.config
@@ -25,6 +25,7 @@ params {
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_tool                     = 'fastp'
+    perform_shortread_redundancyestimation = true
     perform_shortread_complexityfilter    = true
     shortread_complexityfilter_tool       = 'fastp'
     perform_shortread_hostremoval         = true
diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config
index 7f870617..441600b4 100644
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@@ -20,7 +20,6 @@ params {
     max_time   = '6.h'
 
     // Input data
-<<<<<<< HEAD
     input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = false
@@ -43,31 +42,6 @@ params {
     kmcp_mode                             = 0
     run_ganon                             = true
     run_krona                             = true
-=======
-    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
-    perform_shortread_qc                   = false
-    perform_longread_qc                    = false
-    perform_shortread_redundancyestimation = false
-    perform_shortread_complexityfilter     = false
-    perform_shortread_hostremoval          = false
-    perform_longread_hostremoval           = false
-    perform_runmerging                     = false
-    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                              = true
-    run_kraken2                            = true
-    run_bracken                            = true
-    run_malt                               = false // too big with other profiles on GHA
-    run_metaphlan                          = true
-    run_centrifuge                         = true
-    run_diamond                            = true
-    run_krakenuniq                         = true
-    run_motus                              = false
-    run_kmcp                               = true
-    kmcp_mode                              = 0
-    run_ganon                              = true
-    run_krona                              = true
->>>>>>> bouncy-basenji
 }
 
 process {
diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config
index 6c169408..9380980f 100644
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@@ -25,6 +25,7 @@ params {
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
+    perform_shortread_redundancyestimation = true
     perform_shortread_complexityfilter    = true
     perform_shortread_hostremoval         = true
     perform_longread_hostremoval          = true
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index 93d126bf..d5a52c81 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -24,6 +24,7 @@ params {
     databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
     perform_shortread_qc                  = false
     perform_longread_qc                   = false
+    perform_shortread_redundancyestimation = true
     perform_shortread_complexityfilter    = false
     perform_shortread_hostremoval         = false
     perform_longread_hostremoval          = false

From ba2a2ed3689345cac76b2de51353bce01c3392e3 Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Thu, 11 Apr 2024 10:08:01 +0200
Subject: [PATCH 04/16] format config files

---
 conf/test.config                | 56 ++++++++++++++++-----------------
 conf/test_adapterremoval.config | 44 +++++++++++++-------------
 conf/test_fastp.config          | 46 +++++++++++++--------------
 conf/test_noprofiling.config    | 44 +++++++++++++-------------
 conf/test_nothing.config        | 42 ++++++++++++-------------
 5 files changed, 116 insertions(+), 116 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index d6395c94..1e59686a 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -20,35 +20,35 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
-    perform_shortread_qc                  = true
-    perform_longread_qc                   = true
-    shortread_qc_mergepairs               = true
+    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
+    perform_shortread_qc                   = true
+    perform_longread_qc                    = true
+    shortread_qc_mergepairs                = true
     perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter    = true
-    perform_shortread_hostremoval         = true
-    perform_longread_hostremoval          = true
-    perform_runmerging                    = true
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = true
-    run_kraken2                           = true
-    run_bracken                           = true
-    run_malt                              = false
-    run_metaphlan                         = true
-    run_centrifuge                        = true
-    run_diamond                           = true
-    run_krakenuniq                        = true
-    run_motus                             = false
-    run_ganon                             = true
-    run_krona                             = true
-    run_kmcp                              = true
-    kmcp_mode                             = 0
-    krona_taxonomy_directory              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
-    malt_save_reads                       = true
-    kraken2_save_reads                    = true
-    centrifuge_save_reads                 = true
-    run_profile_standardisation           = true
+    perform_shortread_complexityfilter     = true
+    perform_shortread_hostremoval          = true
+    perform_longread_hostremoval           = true
+    perform_runmerging                     = true
+    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = true
+    run_kraken2                            = true
+    run_bracken                            = true
+    run_malt                               = false
+    run_metaphlan                          = true
+    run_centrifuge                         = true
+    run_diamond                            = true
+    run_krakenuniq                         = true
+    run_motus                              = false
+    run_ganon                              = true
+    run_krona                              = true
+    run_kmcp                               = true
+    kmcp_mode                              = 0
+    krona_taxonomy_directory               = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
+    malt_save_reads                        = true
+    kraken2_save_reads                     = true
+    centrifuge_save_reads                  = true
+    run_profile_standardisation            = true
 }
 
 process {
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index be77ded0..73c5ae9f 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -20,29 +20,29 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
-    perform_shortread_qc                  = true
-    perform_longread_qc                   = true
-    shortread_qc_tool                     = 'adapterremoval'
+    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
+    perform_shortread_qc                   = true
+    perform_longread_qc                    = true
+    shortread_qc_tool                      = 'adapterremoval'
     perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter    = true
-    perform_shortread_hostremoval         = true
-    perform_longread_hostremoval          = true
-    perform_runmerging                    = true
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = true
-    run_kraken2                           = true
-    run_bracken                           = false
-    run_malt                              = false
-    run_metaphlan                         = false
-    run_centrifuge                        = false
-    run_diamond                           = false
-    run_krakenuniq                        = false
-    run_motus                             = false
-    run_ganon                             = false
-    run_kmcp                              = false
-    kmcp_mode                             = 0
+    perform_shortread_complexityfilter     = true
+    perform_shortread_hostremoval          = true
+    perform_longread_hostremoval           = true
+    perform_runmerging                     = true
+    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = true
+    run_kraken2                            = true
+    run_bracken                            = false
+    run_malt                               = false
+    run_metaphlan                          = false
+    run_centrifuge                         = false
+    run_diamond                            = false
+    run_krakenuniq                         = false
+    run_motus                              = false
+    run_ganon                              = false
+    run_kmcp                               = false
+    kmcp_mode                              = 0
 }
 
 process {
diff --git a/conf/test_fastp.config b/conf/test_fastp.config
index ebd8f618..dcfbbfbf 100644
--- a/conf/test_fastp.config
+++ b/conf/test_fastp.config
@@ -20,30 +20,30 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
-    perform_shortread_qc                  = true
-    perform_longread_qc                   = true
-    shortread_qc_tool                     = 'fastp'
+    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
+    perform_shortread_qc                   = true
+    perform_longread_qc                    = true
+    shortread_qc_tool                      = 'fastp'
     perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter    = true
-    shortread_complexityfilter_tool       = 'fastp'
-    perform_shortread_hostremoval         = true
-    perform_longread_hostremoval          = true
-    perform_runmerging                    = true
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = true
-    run_kraken2                           = true
-    run_bracken                           = false
-    run_malt                              = false
-    run_metaphlan                         = false
-    run_centrifuge                        = false
-    run_diamond                           = false
-    run_krakenuniq                        = false
-    run_motus                             = false
-    run_ganon                             = false
-    run_kmcp                              = false
-    kmcp_mode                             = 0
+    perform_shortread_complexityfilter     = true
+    shortread_complexityfilter_tool        = 'fastp'
+    perform_shortread_hostremoval          = true
+    perform_longread_hostremoval           = true
+    perform_runmerging                     = true
+    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = true
+    run_kraken2                            = true
+    run_bracken                            = false
+    run_malt                               = false
+    run_metaphlan                          = false
+    run_centrifuge                         = false
+    run_diamond                            = false
+    run_krakenuniq                         = false
+    run_motus                              = false
+    run_ganon                              = false
+    run_kmcp                               = false
+    kmcp_mode                              = 0
 }
 
 process {
diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config
index 9380980f..6b9182c3 100644
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@@ -20,29 +20,29 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
-    perform_shortread_qc                  = true
-    perform_longread_qc                   = true
-    shortread_qc_mergepairs               = true
+    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
+    perform_shortread_qc                   = true
+    perform_longread_qc                    = true
+    shortread_qc_mergepairs                = true
     perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter    = true
-    perform_shortread_hostremoval         = true
-    perform_longread_hostremoval          = true
-    perform_runmerging                    = true
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = false
-    run_kraken2                           = false
-    run_bracken                           = false
-    run_malt                              = false
-    run_metaphlan                         = false
-    run_centrifuge                        = false
-    run_diamond                           = false
-    run_krakenuniq                        = false
-    run_motus                             = false
-    run_kmcp                              = false
-    kmcp_mode                             = 0
-    run_ganon                             = false
+    perform_shortread_complexityfilter     = true
+    perform_shortread_hostremoval          = true
+    perform_longread_hostremoval           = true
+    perform_runmerging                     = true
+    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = false
+    run_kraken2                            = false
+    run_bracken                            = false
+    run_malt                               = false
+    run_metaphlan                          = false
+    run_centrifuge                         = false
+    run_diamond                            = false
+    run_krakenuniq                         = false
+    run_motus                              = false
+    run_kmcp                               = false
+    kmcp_mode                              = 0
+    run_ganon                              = false
 }
 
 process {
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index d5a52c81..bdb48364 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -20,28 +20,28 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
-    perform_shortread_qc                  = false
-    perform_longread_qc                   = false
+    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
+    perform_shortread_qc                   = false
+    perform_longread_qc                    = false
     perform_shortread_redundancyestimation = true
-    perform_shortread_complexityfilter    = false
-    perform_shortread_hostremoval         = false
-    perform_longread_hostremoval          = false
-    perform_runmerging                    = false
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = false
-    run_kraken2                           = false
-    run_bracken                           = false
-    run_malt                              = false
-    run_metaphlan                         = false
-    run_centrifuge                        = false
-    run_diamond                           = false
-    run_krakenuniq                        = false
-    run_motus                             = false
-    run_kmcp                              = false
-    kmcp_mode                             = 0
-    run_ganon                             = false
+    perform_shortread_complexityfilter     = false
+    perform_shortread_hostremoval          = false
+    perform_longread_hostremoval           = false
+    perform_runmerging                     = false
+    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = false
+    run_kraken2                            = false
+    run_bracken                            = false
+    run_malt                               = false
+    run_metaphlan                          = false
+    run_centrifuge                         = false
+    run_diamond                            = false
+    run_krakenuniq                         = false
+    run_motus                              = false
+    run_kmcp                               = false
+    kmcp_mode                              = 0
+    run_ganon                              = false
 }
 
 process {

From a5f7c785034798f6cfbc899a727f877c184a7f3c Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Thu, 11 Apr 2024 10:17:44 +0200
Subject: [PATCH 05/16] update CHANGELOG.md

---
 CHANGELOG.md                     |  1 +
 conf/test_nopreprocessing.config | 45 ++++++++++++++++----------------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea9134c4..4de8db60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Added`
 
 - [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)
+- [#466](https://github.com/nf-core/taxprofiler/pull/466) The new column `db_type` has been added to the database sheet to differentiate between long-read and short-read parameters in databases.
 
 ## v1.1.6dev - [unreleased]
 
diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config
index 441600b4..49d544e3 100644
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@@ -20,28 +20,29 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
-    perform_shortread_qc                  = false
-    perform_longread_qc                   = false
-    perform_shortread_complexityfilter    = false
-    perform_shortread_hostremoval         = false
-    perform_longread_hostremoval          = false
-    perform_runmerging                    = false
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = true
-    run_kraken2                           = true
-    run_bracken                           = true
-    run_malt                              = false // too big with other profiles on GHA
-    run_metaphlan                         = true
-    run_centrifuge                        = true
-    run_diamond                           = true
-    run_krakenuniq                        = true
-    run_motus                             = false
-    run_kmcp                              = true
-    kmcp_mode                             = 0
-    run_ganon                             = true
-    run_krona                             = true
+    input                                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                              = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
+    perform_shortread_qc                   = false
+    perform_longread_qc                    = false
+    perform_shortread_redundancyestimation = false
+    perform_shortread_complexityfilter     = false
+    perform_shortread_hostremoval          = false
+    perform_longread_hostremoval           = false
+    perform_runmerging                     = false
+    hostremoval_reference                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = true
+    run_kraken2                            = true
+    run_bracken                            = true
+    run_malt                               = false // too big with other profiles on GHA
+    run_metaphlan                          = true
+    run_centrifuge                         = true
+    run_diamond                            = true
+    run_krakenuniq                         = true
+    run_motus                              = false
+    run_kmcp                               = true
+    kmcp_mode                              = 0
+    run_ganon                              = true
+    run_krona                              = true
 }
 
 process {

From b5c76f7a350ceac8dd4ca662c46e2330be985a95 Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Thu, 11 Apr 2024 13:10:03 +0200
Subject: [PATCH 06/16] Enable downloading the results of failed tests for
 debugging

---
 .github/workflows/ci.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a5eb375f..4f442bcb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -73,3 +73,12 @@ jobs:
           else
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};
           fi
+
+      - name: Upload results and logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: logs-${{ matrix.profile }}
+          path: |
+            ./results_${{ matrix.tags }}
+          overwrite: true

From abe18e51f0595b2a2771e7e106943329cda50cc4 Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Thu, 11 Apr 2024 13:53:40 +0200
Subject: [PATCH 07/16] Update ci.yml

Update the mOTU database name in ci.yml
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4f442bcb..d6bf9264 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -68,7 +68,7 @@ jobs:
             echo 'tool,db_name,db_params,db_type,db_path' > 'database_motus.csv'
             echo "motus,db1_mOTU,,short,db_mOTU" >> 'database_motus.csv'
             echo "motus,db2_mOTU,prep_long,long,db_mOTU" >> 'database_motus.csv'
-            echo "motus,db2_mOTU,,both,db_mOTU" >> 'database_motus.csv'
+            echo "motus,db3_mOTU,,both,db_mOTU" >> 'database_motus.csv'
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
           else
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};

From 60087e7250ec6c13a9dbfbc49d6866d696ed5fa6 Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Thu, 11 Apr 2024 15:26:11 +0200
Subject: [PATCH 08/16] Update ci.yml

---
 .github/workflows/ci.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d6bf9264..fd0bed28 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -73,12 +73,3 @@ jobs:
           else
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};
           fi
-
-      - name: Upload results and logs on failure
-        if: failure()
-        uses: actions/upload-artifact@v4
-        with:
-          name: logs-${{ matrix.profile }}
-          path: |
-            ./results_${{ matrix.tags }}
-          overwrite: true

From f8bd172937d8961eee4b79375f534c79dcc238ac Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Wed, 17 Apr 2024 08:04:54 +0200
Subject: [PATCH 09/16] Update CHANGELOG.md

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4de8db60..f8ed19d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Added`
 
 - [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)
-- [#466](https://github.com/nf-core/taxprofiler/pull/466) The new column `db_type` has been added to the database sheet to differentiate between long-read and short-read parameters in databases.
+- [#466](https://github.com/nf-core/taxprofiler/pull/466) - Input database sheets now require a `db_type` column to distinguish between short- and long-read databases
 
 ## v1.1.6dev - [unreleased]
 

From 1f5d5eace0d152cc4a031c8789f8c36ffacda327 Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Thu, 13 Jun 2024 16:01:14 +0200
Subject: [PATCH 10/16] combine reads and db by db_type

---
 assets/schema_database.json     | 23 ++-----------
 subworkflows/local/profiling.nf | 61 +++++++++++++++++++--------------
 workflows/taxprofiler.nf        |  9 +++--
 3 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/assets/schema_database.json b/assets/schema_database.json
index fcf52fb6..e5611894 100644
--- a/assets/schema_database.json
+++ b/assets/schema_database.json
@@ -36,30 +36,13 @@
             "db_params": {
                 "type": "string",
                 "pattern": "^[^\"']*$",
-                "anyOf": [
-                    {
-                        "properties": {
-                            "tool": { "const": "bracken" }
-                        },
-                        "not": {
-                            "pattern": ".*;"
-                        },
-                        "errorMessage": "Invalid database db_params entry. Bracken requires a semi-colon for passing one or more parameters."
-                    },
-                    {
-                        "properties": {
-                            "tool": { "const": "kmcp" }
-                        },
-                        "pattern": ".*;$",
-                        "errorMessage": "Invalid database `db_params` entry. KMCP only requires a semi-colon if passing arguments to KMCP profile, in cases of which the arguments should go after the semi-colon."
-                    }
-                ],
                 "errorMessage": "Invalid database db_params entry. No quotes allowed.",
                 "meta": ["db_params"]
             },
             "db_type": {
                 "type": "string",
-                "enum": ["short", "long", "both"],
+                "enum": ["short", "long", "short,long"],
+                "default": ["short,long"],
                 "meta": ["db_type"]
             },
             "db_path": {
@@ -69,7 +52,7 @@
                 "errorMessage": "db_path should be either a file path or a directory."
             }
         },
-        "required": ["tool", "db_name", "db_type", "db_path"],
+        "required": ["tool", "db_name", "db_path"],
         "uniqueEntries": ["tool", "db_name"]
     }
 }
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 2241d1eb..de80b9f6 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -61,33 +61,42 @@ workflow PROFILING {
     */
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-    ch_input_for_profiling = reads
-            .map {
-                meta, reads ->
-                    [meta + [id: "${meta.id}${meta.single_end ? '_se' : '_pe'}"], reads]
-            }
-            .combine(databases)
-            .filter { it ->
-                def platform = it[0]['instrument_platform']
-                def db_type = it[2]['db_type']
-                def is_long_read = platform == 'OXFORD_NANOPORE'
-                def is_long_db = db_type == 'long' || db_type == 'both'
-                def is_short_db = db_type == 'short' || db_type == 'both'
-                (is_long_read && is_long_db) || (!is_long_read && is_short_db)
-            }
-            .branch {
-                centrifuge: it[2]['tool'] == 'centrifuge'
-                diamond: it[2]['tool'] == 'diamond'
-                kaiju: it[2]['tool'] == 'kaiju'
-                kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken
-                krakenuniq: it[2]['tool'] == 'krakenuniq'
-                malt:    it[2]['tool'] == 'malt'
-                metaphlan: it[2]['tool'] == 'metaphlan'
-                motus: it[2]['tool'] == 'motus'
-                kmcp: it[2]['tool'] == 'kmcp'
-                ganon: it[2]['tool'] == 'ganon'
-                unknown: true
+    ch_reads = reads
+        .map { meta, reads -> [ meta.type, meta.subMap( meta.keySet() - 'type' ), reads ] }
+
+    ch_dbs = databases
+        .flatMap { db ->
+            def ( db_meta, db_path ) = db
+            def db_types = db_meta.db_type.replaceAll(/\[|\]/, '').split(',') //removes the square brackets and splits the string into a list ["short", "long"]
+            if ( db_types.size() > 1 ) {
+                return db_types.collect { it ->
+                    def new_db_meta = db_meta.clone()
+                    [new_db_meta,db_path]
+                }
+            } else {
+                return [ db ]
             }
+        }
+        .map{ meta, db -> [ meta.db_type, meta.subMap( meta.keySet() - 'db_type' ), db ] }
+
+    ch_input_for_profiling = reads
+        .map { meta, reads -> [ meta.type, meta.subMap( meta.keySet() - 'type' ), reads ] }
+        .combine(ch_dbs, by: 0)
+        .map{ db_type, meta, reads, db_meta, db ->
+            [ meta, reads, db_meta, db ] }
+        .branch { meta, reads, db_meta, db ->
+            centrifuge: db_meta.tool == 'centrifuge'
+            diamond: db_meta.tool == 'diamond'
+            kaiju: db_meta.tool == 'kaiju'
+            kraken2: db_meta.tool == 'kraken2' || db_meta.tool == 'bracken' // to reuse the kraken module to produce the input data for bracken
+            krakenuniq: db_meta.tool == 'krakenuniq'
+            malt:    db_meta.tool == 'malt'
+            metaphlan: db_meta.tool == 'metaphlan'
+            motus: db_meta.tool == 'motus'
+            kmcp: db_meta.tool == 'kmcp'
+            ganon: db_meta.tool == 'ganon'
+            unknown: true
+        }
 
     /*
         PREPARE PROFILER INPUT CHANNELS & RUN PROFILING
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 2399797f..fad397b8 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -135,13 +135,13 @@ workflow TAXPROFILER {
         }
         .branch { meta, run_accession, instrument_platform, fastq_1, fastq_2, fasta ->
             fastq: meta.single_end || fastq_2
-                return [ meta, fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ]
+                return [ meta + [ type: "short" ], fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ]
             nanopore: instrument_platform == 'OXFORD_NANOPORE'
                 meta.single_end = true
-                return [ meta, [ fastq_1 ] ]
+                return [ meta + [ type: "long" ], [ fastq_1 ] ]
             fasta: meta.is_fasta
                 meta.single_end = true
-                return [ meta, [ fasta ] ]
+                return [ meta + [ type: "short" ], [ fasta ] ]
         }
 
     // Merge ch_input.fastq and ch_input.nanopore into a single channel
@@ -150,6 +150,9 @@ workflow TAXPROFILER {
     // Validate and decompress databases
     ch_dbs_for_untar = databases
         .branch { db_meta, db_path ->
+            if ( !db_meta.db_type ) {
+                db_meta = db_meta + [ db_type: "short,long" ]
+            }
             untar: db_path.name.endsWith( ".tar.gz" )
             skip: true
         }

From 301119ac9e5d3eb3d9f1098761aad692236b2642 Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Thu, 13 Jun 2024 16:39:03 +0200
Subject: [PATCH 11/16] Update ci.yml

correct the definition of db_type for both short and long reads
---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 21b49ed3..3fd66013 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -67,8 +67,8 @@ jobs:
             python downloadDB.py --no-download-progress
             echo 'tool,db_name,db_params,db_type,db_path' > 'database_motus.csv'
             echo "motus,db1_mOTU,,short,db_mOTU" >> 'database_motus.csv'
-            echo "motus,db2_mOTU,prep_long,long,db_mOTU" >> 'database_motus.csv'
-            echo "motus,db3_mOTU,,both,db_mOTU" >> 'database_motus.csv'
+            echo "motus,db2_mOTU,,long,db_mOTU" >> 'database_motus.csv'
+            echo "motus,db3_mOTU,,\"short,long\",db_mOTU" >> 'database_motus.csv'
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
           else
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};

From 749cd28cb52f1a0b6c2041df3b5a7cc975c9aca0 Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Fri, 14 Jun 2024 08:53:54 +0200
Subject: [PATCH 12/16] Update profiling.nf

---
 subworkflows/local/profiling.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 55520c82..8028ef69 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -69,6 +69,7 @@ workflow PROFILING {
             if ( db_types.size() > 1 ) {
                 return db_types.collect { it ->
                     def new_db_meta = db_meta.clone()
+                    new_db_meta.db_type = it
                     [new_db_meta,db_path]
                 }
             } else {

From 862937b8ddb2881115e1715f562aaa4b58d4f9ae Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Tue, 18 Jun 2024 13:41:22 +0200
Subject: [PATCH 13/16] seperate db_type short,long(both) by semicolon

---
 assets/schema_database.json     | 4 ++--
 subworkflows/local/profiling.nf | 2 +-
 workflows/taxprofiler.nf        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/assets/schema_database.json b/assets/schema_database.json
index e5611894..be401809 100644
--- a/assets/schema_database.json
+++ b/assets/schema_database.json
@@ -41,8 +41,8 @@
             },
             "db_type": {
                 "type": "string",
-                "enum": ["short", "long", "short,long"],
-                "default": ["short,long"],
+                "enum": ["short", "long", "short;long"],
+                "default": ["short;long"],
                 "meta": ["db_type"]
             },
             "db_path": {
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 8028ef69..b4f23c22 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -65,7 +65,7 @@ workflow PROFILING {
     ch_dbs = databases
         .flatMap { db ->
             def ( db_meta, db_path ) = db
-            def db_types = db_meta.db_type.replaceAll(/\[|\]/, '').split(',') //removes the square brackets and splits the string into a list ["short", "long"]
+            def db_types = db_meta.db_type.replaceAll(/\[|\]/, '').split(';') //removes the square brackets and splits the string into a list ["short", "long"]
             if ( db_types.size() > 1 ) {
                 return db_types.collect { it ->
                     def new_db_meta = db_meta.clone()
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 5a36c892..be051ff2 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -151,7 +151,7 @@ workflow TAXPROFILER {
     ch_dbs_for_untar = databases
         .branch { db_meta, db_path ->
             if ( !db_meta.db_type ) {
-                db_meta = db_meta + [ db_type: "short,long" ]
+                db_meta = db_meta + [ db_type: "short;long" ]
             }
             untar: db_path.name.endsWith( ".tar.gz" )
             skip: true

From f8e1e4f38a11c7e5cc8b9a6abeefb78958a7fecf Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Tue, 18 Jun 2024 13:48:25 +0200
Subject: [PATCH 14/16] replace comma as semicolon in db_type

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3fd66013..c47e8cdc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -68,7 +68,7 @@ jobs:
             echo 'tool,db_name,db_params,db_type,db_path' > 'database_motus.csv'
             echo "motus,db1_mOTU,,short,db_mOTU" >> 'database_motus.csv'
             echo "motus,db2_mOTU,,long,db_mOTU" >> 'database_motus.csv'
-            echo "motus,db3_mOTU,,\"short,long\",db_mOTU" >> 'database_motus.csv'
+            echo "motus,db3_mOTU,,short;long,db_mOTU" >> 'database_motus.csv'
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
           else
             nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};

From 58ef246a90f1164d24e95494a7e4443282b3c554 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 20 Jun 2024 08:56:42 +0000
Subject: [PATCH 15/16] Make database splitting and merging more nextflow-y

---
 assets/schema_database.json     |  2 +-
 subworkflows/local/profiling.nf | 43 +++++++++++++++++++--------------
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/assets/schema_database.json b/assets/schema_database.json
index be401809..ec04e326 100644
--- a/assets/schema_database.json
+++ b/assets/schema_database.json
@@ -42,7 +42,7 @@
             "db_type": {
                 "type": "string",
                 "enum": ["short", "long", "short;long"],
-                "default": ["short;long"],
+                "default": "short;long",
                 "meta": ["db_type"]
             },
             "db_path": {
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index b4f23c22..95a1b74f 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -60,29 +60,36 @@ workflow PROFILING {
         COMBINE READS WITH POSSIBLE DATABASES
     */
 
-    // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-
+    // Separate default 'short;long' (when necessary) databases when short/long specified in database sheet
     ch_dbs = databases
-        .flatMap { db ->
-            def ( db_meta, db_path ) = db
-            def db_types = db_meta.db_type.replaceAll(/\[|\]/, '').split(';') //removes the square brackets and splits the string into a list ["short", "long"]
-            if ( db_types.size() > 1 ) {
-                return db_types.collect { it ->
-                    def new_db_meta = db_meta.clone()
-                    new_db_meta.db_type = it
-                    [new_db_meta,db_path]
-                }
-            } else {
-                return [ db ]
-            }
+        .map{
+            meta_db, db ->
+            [ [meta_db.db_type.split(";")].flatten(), meta_db, db]
+        }
+        .transpose(by: 0)
+        .map{
+            type, meta_db, db ->
+            [[type: type], meta_db.subMap(meta_db.keySet() - 'db_type') + [type: type], db]
         }
-        .map{ meta, db -> [ meta.db_type, meta.subMap( meta.keySet() - 'db_type' ), db ] }
+        .dump(tag: 'databases')
+
+    // Join short and long reads with their corresponding short/long database
+    // Note that for not-specified `short;long`, it will match with the database.
+    // E.g. if there is no 'long' reads the above generted 'long' database channel element
+    //  will have nothing to join to and will be discarded
+    // Final output: [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
 
     ch_input_for_profiling = reads
-        .map { meta, reads -> [ meta.type, meta.subMap( meta.keySet() - 'type' ), reads ] }
+        .map{
+            meta, reads ->
+            [[type: meta.type], meta, reads]
+        }
         .combine(ch_dbs, by: 0)
-        .map{ db_type, meta, reads, db_meta, db ->
-            [ meta, reads, db_meta, db ] }
+        .map{
+            db_type, meta, reads, db_meta, db ->
+            [ meta, reads, db_meta, db ]
+        }
+        .dump(tag: 'input to profiling')
         .branch { meta, reads, db_meta, db ->
             centrifuge: db_meta.tool == 'centrifuge'
             diamond: db_meta.tool == 'diamond'

From 2534c088de68bb068eda75f8450b539b62bb3467 Mon Sep 17 00:00:00 2001
From: LilyAnderssonLee <lilili2011me@gmail.com>
Date: Tue, 25 Jun 2024 08:43:18 +0200
Subject: [PATCH 16/16] remove dump from profiling.nf

---
 subworkflows/local/profiling.nf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 95a1b74f..55ea8e47 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -71,7 +71,6 @@ workflow PROFILING {
             type, meta_db, db ->
             [[type: type], meta_db.subMap(meta_db.keySet() - 'db_type') + [type: type], db]
         }
-        .dump(tag: 'databases')
 
     // Join short and long reads with their corresponding short/long database
     // Note that for not-specified `short;long`, it will match with the database.
@@ -89,7 +88,6 @@ workflow PROFILING {
             db_type, meta, reads, db_meta, db ->
             [ meta, reads, db_meta, db ]
         }
-        .dump(tag: 'input to profiling')
         .branch { meta, reads, db_meta, db ->
             centrifuge: db_meta.tool == 'centrifuge'
             diamond: db_meta.tool == 'diamond'