moved params out of workflows

biocorecrg · Oct 22, 2024 · fa60d30 · fa60d30
1 parent 9c74274
commit fa60d30
Show file tree

Hide file tree

Showing 10 changed files with 80 additions and 53 deletions.
diff --git a/main.nf b/main.nf
@@ -164,25 +164,10 @@ workflow {
             .fromFilePairs(params.annotations, size: 1)
             .ifEmpty { error "Cannot find any annotation matching: ${params.annotations}" }
 
-        extraexons = params.extraexons ?
-            Channel.fromFilePairs(params.extraexons, checkIfExists: true, size: 1)
-            .ifEmpty { error "Extra exons not found" } :
-            Channel.empty()
-
-        // We join channels. If no extraexons, then it's empty, so no problem
-        data_to_annotation_raw = genomes.join(annotations)
-        data_to_annotation = data_to_annotation_raw.join(extraexons, remainder: true)
-
-        evodists_ch = Channel.fromPath(params.evodists, checkIfExists: true).collect()
         clusterfile_ch = Channel.fromPath(params.cluster, checkIfExists: true).collect()
-        if ( params.orthopairs ) {
-            orthopairs_ch = Channel.fromPath(params.orthopairs, checkIfExists: true).collect()
-        } else {
-            orthopairs_ch = Channel.fromPath("/path/to/NO_FILE").collect()
-        }
 
         PREPARE(
-            evodists_ch,
+            params.evodists,
             clusterfile_ch,
             gtfs,
             fastas,
@@ -191,8 +176,10 @@ workflow {
             params.long_dist,
             params.medium_dist,
             params.short_dist,
-            data_to_annotation,
-            params.extraexons
+            genomes,
+            annotations,
+            params.extraexons,
+            params.alignmentnum
         )
 
         ALIGN(
@@ -202,10 +189,27 @@ workflow {
             params.long_dist,
             params.medium_dist,
             params.short_dist,
+            params.alignmentnum,
+            params.prevaln
+        )
+
+        SCORE(
+            ALIGN.out.folder_jscores,
+            PREPARE.out.clusters_split_ch,
+            PREPARE.out.dist_ranges_ch,
+            params.bonafide_pairs,
+            params.long_dist,
+            params.medium_dist,
+            params.short_dist
         )
 
-        SCORE(ALIGN.out.folder_jscores, PREPARE.out.clusters_split_ch, PREPARE.out.dist_ranges_ch, params.bonafide_pairs)
-        CLUSTER(SCORE.out.score_exon_hits_pairs, PREPARE.out.clusters_split_ch, clusterfile_ch, orthopairs_ch)
+        CLUSTER(
+            SCORE.out.score_exon_hits_pairs,
+            PREPARE.out.clusters_split_ch,
+            clusterfile_ch,
+            params.orthopairs,
+            params.orthogroupnum
+        )
     }
 }
 

diff --git a/modules/local/exorthist/align_pairs.nf b/modules/local/exorthist/align_pairs.nf
@@ -8,13 +8,14 @@ process PARSE_IPA_PROT_ALN {
     val long_dist
     val medium_dist
     val short_dist
+    path prevaln
 
     output:
     tuple val("${sp1.name}-${sp2.name}"), path("${sp1.name}-${sp2.name}-*"), emit: aligned_subclusters_4_splitting
     path "${sp1.name}-${sp2.name}_EXs_to_split_part_*.txt", emit: EXs_to_split
 
     script:
-    def prev_alignments = params.prevaln ? params.prevaln : ""
+    def prev_alignments = prevaln.name != 'NO_FILE' ? "${prevaln}" : ''
     def cls_parts = cls_part_file.name.split("_")
     def dist_range_par
 

diff --git a/modules/local/exorthist/filter_matches.nf b/modules/local/exorthist/filter_matches.nf
@@ -7,6 +7,9 @@ process FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE {
 
     input:
     tuple val(comp_id), path(all_scores), val(dist_range)
+    val(long_dist)
+    val(medium_dist)
+    val(short_dist)
 
     output:
     path "*.tab", emit: filterscore_per_joining
@@ -18,13 +21,13 @@ process FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE {
 
     switch(dist_range) {
         case "long":
-            dist_range_par = params.long_dist.split(",")
+            dist_range_par = long_dist.split(",")
             break
         case "medium":
-            dist_range_par = params.medium_dist.split(",")
+            dist_range_par = medium_dist.split(",")
             break
         case "short":
-            dist_range_par = params.short_dist.split(",")
+            dist_range_par = short_dist.split(",")
             break
     }
 

diff --git a/modules/local/exorthist/format_input.nf b/modules/local/exorthist/format_input.nf
@@ -2,6 +2,7 @@ process FORMAT_EX_CLUSTERS_INPUT {
     input:
     path score_exon_hits_pairs
     path clusterfile
+    val(orthogroupnum)
 
     output:
     path "PART_*-cluster_input.tab", emit: cluster_parts
@@ -10,10 +11,10 @@ process FORMAT_EX_CLUSTERS_INPUT {
     """
     if [[ "${clusterfile}" == *.gz ]]; then
         zcat ${clusterfile} > cluster_file
-        D1_format_EX_clusters_input.pl cluster_file ${score_exon_hits_pairs} ${params.orthogroupnum}
+        D1_format_EX_clusters_input.pl cluster_file ${score_exon_hits_pairs} ${orthogroupnum}
         rm cluster_file
     else
-        D1_format_EX_clusters_input.pl ${clusterfile} ${score_exon_hits_pairs} ${params.orthogroupnum}
+        D1_format_EX_clusters_input.pl ${clusterfile} ${score_exon_hits_pairs} ${orthogroupnum}
     fi
     """
 }
diff --git a/modules/local/exorthist/split_clusters_chunks.nf b/modules/local/exorthist/split_clusters_chunks.nf
@@ -4,6 +4,7 @@ process SPLIT_CLUSTERS_IN_CHUNKS {
     input:
     path cls_tab_files
     tuple val(id_comb), path(idfolder_A), path(idfolder_B)
+    val(alignmentnum)
 
     output:
     tuple path(idfolder_A), path(idfolder_B), path("${idfolder_A}_${idfolder_B}/*.cls.tab-part_*"), emit: cls_files_2_align
@@ -15,7 +16,7 @@ process SPLIT_CLUSTERS_IN_CHUNKS {
         --sp2 ${idfolder_B} \
         --expath ./ \
         --project_dir ./ \
-        --N_split ${params.alignmentnum} \
+        --N_split ${alignmentnum} \
         --gene_cluster ${id_comb}.cls.tab
     """
 }
diff --git a/modules/local/exorthist/split_pairs.nf b/modules/local/exorthist/split_pairs.nf
@@ -2,14 +2,15 @@ process SPLIT_EX_PAIRS_TO_REALIGN {
     label 'pandas'
     input:
     path '*'
+    val(alignmentnum)
 
     output:
     path '*EXs_to_realign_part_*', emit: EXs_to_realign_batches
 
     script:
     """
     for file in \$(ls *); do
-        B2_split_EX_pairs_to_realign.py -i \${file} -n ${params.alignmentnum}
+        B2_split_EX_pairs_to_realign.py -i \${file} -n ${alignmentnum}
     done
     """
 }
diff --git a/subworkflows/local/exorthist/align.nf b/subworkflows/local/exorthist/align.nf
@@ -14,17 +14,25 @@ workflow ALIGN {
     long_dist
     medium_dist
     short_dist
+    alignmentnum
+    prevaln
 
     main:
+    if (prevaln) {
+        prevaln_ch = Channel.fromPath(prevaln, type: 'dir', checkIfExists: true).collect()
+    } else {
+        prevaln_ch = Channel.fromPath("/path/to/NO_FILE").collect()
+    }
+
     // the last argument is the protein similarity alignment.
     // if a prevaln folder is provided, the protein alignments present in each species pair subfolder will not be repeated.
-    PARSE_IPA_PROT_ALN(blosumfile, alignment_input, long_dist, medium_dist, short_dist)
+    PARSE_IPA_PROT_ALN(blosumfile, alignment_input, long_dist, medium_dist, short_dist, prevaln_ch)
 
     // Collapse EXs_to_split in batches of 500 files
     EXs_to_split = PARSE_IPA_PROT_ALN.out.EXs_to_split
     EXs_to_split_batches = EXs_to_split.toSortedList().flatten().buffer(size : 500, remainder: true)
     // Split exons pairs to realign
-    SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches)
+    SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches, alignmentnum)
     EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
     // Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
     EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(clusters_split_ch).transpose()

diff --git a/subworkflows/local/exorthist/cluster.nf b/subworkflows/local/exorthist/cluster.nf
@@ -12,11 +12,17 @@ workflow CLUSTER {
     score_exon_hits_pairs
     clusters_split_ch
     clusterfile_ch
-    orthopairs_ch
+    orthopairs
+    orthogroupnum
 
     main:
+    if (orthopairs) {
+        orthopairs_ch = Channel.fromPath(orthopairs, checkIfExists: true).collect()
+    } else {
+        orthopairs_ch = Channel.fromPath("/path/to/NO_FILE").collect()
+    }
 
-    FORMAT_EX_CLUSTERS_INPUT(score_exon_hits_pairs, clusterfile_ch)
+    FORMAT_EX_CLUSTERS_INPUT(score_exon_hits_pairs, clusterfile_ch, orthogroupnum)
 
     // Split the file of exon pairs
     // Unclustered are the exons ending up in single-exon clusters

diff --git a/subworkflows/local/exorthist/prepare.nf b/subworkflows/local/exorthist/prepare.nf
@@ -8,7 +8,7 @@ include { SPLIT_CLUSTERS_BY_SPECIES_PAIRS } from "${LOCAL_MODULES}/split_cluster
 workflow PREPARE {
 
     take:
-    evodists_ch
+    evodists
     clusterfile_ch
     gtfs
     fastas
@@ -17,24 +17,13 @@ workflow PREPARE {
     long_dist
     medium_dist
     short_dist
-    data_to_annotation
+    genomes
+    annotations
     extraexons
+    alignmentnum
 
     main:
 
-    // Print contents of each channel
-    // gtfs.view { "GTF file: $it" }
-    // fastas.view { "FASTA file: $it" }
-    // gtfs_suffix.view { "GTF suffix: $it" }
-    // fastas_suffix.view { "FASTA suffix: $it" }
-    // data_to_annotation.view { "Data to annotation: $it" }
-
-    extraexons_ch = params.extraexons ?
-        Channel.fromFilePairs(params.extraexons, checkIfExists: true, size: 1)
-        .ifEmpty { error "Extra exons not found" } :
-        Channel.empty()
-
-
     CHECK_INPUT(
         evodists_ch,
         clusterfile_ch,
@@ -47,11 +36,21 @@ workflow PREPARE {
         short_dist
     )
 
-    // Sic: https://nextflow-io.github.io/patterns/optional-input/
-    if ( extraexons ) {
-         GENERATE_ANNOTATIONS(data_to_annotation, extraexons_ch)
+    evodists_ch = Channel.fromPath(evodists, checkIfExists: true).collect()
+    extraexons_ch = extraexons ?
+        Channel.fromFilePairs(extraexons, checkIfExists: true, size: 1)
+        .ifEmpty { error "Extra exons not found" } :
+        Channel.empty()
+
+    // We join channels. If no extraexons, then it's empty, so no problem
+    data_to_annotation_raw = genomes.join(annotations)
+    data_to_annotation = data_to_annotation_raw.join(extraexons_ch, remainder: true)
+
+    if (extraexons) {
+        GENERATE_ANNOTATIONS(data_to_annotation, extraexons_ch)
     } else {
-         GENERATE_ANNOTATIONS(data_to_annotation, Channel.fromPath("/path/to/NO_FILE").collect())
+        // Sic: https://nextflow-io.github.io/patterns/optional-input/
+        GENERATE_ANNOTATIONS(data_to_annotation, Channel.fromPath("/path/to/NO_FILE").collect())
     }
 
     clusters_split_ch = GENERATE_ANNOTATIONS.out.idfolders.toList().map{ [it, it].combinations().findAll{ a, b -> a[0] < b[0]} }
@@ -63,7 +62,7 @@ workflow PREPARE {
 
     // Split clusters
     cls_tab_files_ch = SPLIT_CLUSTERS_BY_SPECIES_PAIRS.out.cls_tab_files
-    SPLIT_CLUSTERS_IN_CHUNKS(cls_tab_files_ch.collect(), clusters_split_ch)
+    SPLIT_CLUSTERS_IN_CHUNKS(cls_tab_files_ch.collect(), clusters_split_ch, alignmentnum)
 
     cls_files_2_align = SPLIT_CLUSTERS_IN_CHUNKS.out.cls_files_2_align
     cls_files_2_align_t = cls_files_2_align.transpose().map{[it[0].getFileName().toString()+"-"+it[1].getFileName().toString(), it[0], it[1], it[2]]}

diff --git a/subworkflows/local/exorthist/score.nf b/subworkflows/local/exorthist/score.nf
@@ -12,6 +12,9 @@ workflow SCORE {
     clusters_split_ch
     dist_ranges_ch
     bonafide_pairs
+    long_dist
+    medium_dist
+    short_dist
 
     main:
 
@@ -20,7 +23,7 @@ workflow SCORE {
     SCORE_EX_MATCHES(data_to_score)
     // Filter the best matches above score cutoffs by target gene.
     all_scores_to_filt_ch = SCORE_EX_MATCHES.out.all_scores_to_filt
-    FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE(all_scores_to_filt_ch.join(dist_ranges_ch))
+    FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE(all_scores_to_filt_ch.join(dist_ranges_ch), long_dist, medium_dist, short_dist)
     //  Join filtered scored EX matches
     filterscore_per_joining_ch = FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE.out.filterscore_per_joining
     JOIN_FILTERED_EX_MATCHES(filterscore_per_joining_ch.collect())