Skip to content

Commit

Permalink
moved params out of workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
toniher committed Oct 22, 2024
1 parent 9c74274 commit fa60d30
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 53 deletions.
44 changes: 24 additions & 20 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -164,25 +164,10 @@ workflow {
.fromFilePairs(params.annotations, size: 1)
.ifEmpty { error "Cannot find any annotation matching: ${params.annotations}" }

extraexons = params.extraexons ?
Channel.fromFilePairs(params.extraexons, checkIfExists: true, size: 1)
.ifEmpty { error "Extra exons not found" } :
Channel.empty()

// We join channels. If no extraexons, then it's empty, so no problem
data_to_annotation_raw = genomes.join(annotations)
data_to_annotation = data_to_annotation_raw.join(extraexons, remainder: true)

evodists_ch = Channel.fromPath(params.evodists, checkIfExists: true).collect()
clusterfile_ch = Channel.fromPath(params.cluster, checkIfExists: true).collect()
if ( params.orthopairs ) {
orthopairs_ch = Channel.fromPath(params.orthopairs, checkIfExists: true).collect()
} else {
orthopairs_ch = Channel.fromPath("/path/to/NO_FILE").collect()
}

PREPARE(
evodists_ch,
params.evodists,
clusterfile_ch,
gtfs,
fastas,
Expand All @@ -191,8 +176,10 @@ workflow {
params.long_dist,
params.medium_dist,
params.short_dist,
data_to_annotation,
params.extraexons
genomes,
annotations,
params.extraexons,
params.alignmentnum
)

ALIGN(
Expand All @@ -202,10 +189,27 @@ workflow {
params.long_dist,
params.medium_dist,
params.short_dist,
params.alignmentnum,
params.prevaln
)

SCORE(
ALIGN.out.folder_jscores,
PREPARE.out.clusters_split_ch,
PREPARE.out.dist_ranges_ch,
params.bonafide_pairs,
params.long_dist,
params.medium_dist,
params.short_dist
)

SCORE(ALIGN.out.folder_jscores, PREPARE.out.clusters_split_ch, PREPARE.out.dist_ranges_ch, params.bonafide_pairs)
CLUSTER(SCORE.out.score_exon_hits_pairs, PREPARE.out.clusters_split_ch, clusterfile_ch, orthopairs_ch)
CLUSTER(
SCORE.out.score_exon_hits_pairs,
PREPARE.out.clusters_split_ch,
clusterfile_ch,
params.orthopairs,
params.orthogroupnum
)
}
}

Expand Down
3 changes: 2 additions & 1 deletion modules/local/exorthist/align_pairs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ process PARSE_IPA_PROT_ALN {
val long_dist
val medium_dist
val short_dist
path prevaln

output:
tuple val("${sp1.name}-${sp2.name}"), path("${sp1.name}-${sp2.name}-*"), emit: aligned_subclusters_4_splitting
path "${sp1.name}-${sp2.name}_EXs_to_split_part_*.txt", emit: EXs_to_split

script:
def prev_alignments = params.prevaln ? params.prevaln : ""
def prev_alignments = prevaln.name != 'NO_FILE' ? "${prevaln}" : ''
def cls_parts = cls_part_file.name.split("_")
def dist_range_par

Expand Down
9 changes: 6 additions & 3 deletions modules/local/exorthist/filter_matches.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ process FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE {

input:
tuple val(comp_id), path(all_scores), val(dist_range)
val(long_dist)
val(medium_dist)
val(short_dist)

output:
path "*.tab", emit: filterscore_per_joining
Expand All @@ -18,13 +21,13 @@ process FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE {

switch(dist_range) {
case "long":
dist_range_par = params.long_dist.split(",")
dist_range_par = long_dist.split(",")
break
case "medium":
dist_range_par = params.medium_dist.split(",")
dist_range_par = medium_dist.split(",")
break
case "short":
dist_range_par = params.short_dist.split(",")
dist_range_par = short_dist.split(",")
break
}

Expand Down
5 changes: 3 additions & 2 deletions modules/local/exorthist/format_input.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ process FORMAT_EX_CLUSTERS_INPUT {
input:
path score_exon_hits_pairs
path clusterfile
val(orthogroupnum)

output:
path "PART_*-cluster_input.tab", emit: cluster_parts
Expand All @@ -10,10 +11,10 @@ process FORMAT_EX_CLUSTERS_INPUT {
"""
if [[ "${clusterfile}" == *.gz ]]; then
zcat ${clusterfile} > cluster_file
D1_format_EX_clusters_input.pl cluster_file ${score_exon_hits_pairs} ${params.orthogroupnum}
D1_format_EX_clusters_input.pl cluster_file ${score_exon_hits_pairs} ${orthogroupnum}
rm cluster_file
else
D1_format_EX_clusters_input.pl ${clusterfile} ${score_exon_hits_pairs} ${params.orthogroupnum}
D1_format_EX_clusters_input.pl ${clusterfile} ${score_exon_hits_pairs} ${orthogroupnum}
fi
"""
}
3 changes: 2 additions & 1 deletion modules/local/exorthist/split_clusters_chunks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ process SPLIT_CLUSTERS_IN_CHUNKS {
input:
path cls_tab_files
tuple val(id_comb), path(idfolder_A), path(idfolder_B)
val(alignmentnum)

output:
tuple path(idfolder_A), path(idfolder_B), path("${idfolder_A}_${idfolder_B}/*.cls.tab-part_*"), emit: cls_files_2_align
Expand All @@ -15,7 +16,7 @@ process SPLIT_CLUSTERS_IN_CHUNKS {
--sp2 ${idfolder_B} \
--expath ./ \
--project_dir ./ \
--N_split ${params.alignmentnum} \
--N_split ${alignmentnum} \
--gene_cluster ${id_comb}.cls.tab
"""
}
3 changes: 2 additions & 1 deletion modules/local/exorthist/split_pairs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ process SPLIT_EX_PAIRS_TO_REALIGN {
label 'pandas'
input:
path '*'
val(alignmentnum)

output:
path '*EXs_to_realign_part_*', emit: EXs_to_realign_batches

script:
"""
for file in \$(ls *); do
B2_split_EX_pairs_to_realign.py -i \${file} -n ${params.alignmentnum}
B2_split_EX_pairs_to_realign.py -i \${file} -n ${alignmentnum}
done
"""
}
12 changes: 10 additions & 2 deletions subworkflows/local/exorthist/align.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,25 @@ workflow ALIGN {
long_dist
medium_dist
short_dist
alignmentnum
prevaln

main:
if (prevaln) {
prevaln_ch = Channel.fromPath(prevaln, type: 'dir', checkIfExists: true).collect()
} else {
prevaln_ch = Channel.fromPath("/path/to/NO_FILE").collect()
}

// the last argument is the protein similarity alignment.
// if a prevaln folder is provided, the protein alignments present in each species pair subfolder will not be repeated.
PARSE_IPA_PROT_ALN(blosumfile, alignment_input, long_dist, medium_dist, short_dist)
PARSE_IPA_PROT_ALN(blosumfile, alignment_input, long_dist, medium_dist, short_dist, prevaln_ch)

// Collapse EXs_to_split in batches of 500 files
EXs_to_split = PARSE_IPA_PROT_ALN.out.EXs_to_split
EXs_to_split_batches = EXs_to_split.toSortedList().flatten().buffer(size : 500, remainder: true)
// Split exons pairs to realign
SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches)
SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches, alignmentnum)
EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
// Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(clusters_split_ch).transpose()
Expand Down
10 changes: 8 additions & 2 deletions subworkflows/local/exorthist/cluster.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ workflow CLUSTER {
score_exon_hits_pairs
clusters_split_ch
clusterfile_ch
orthopairs_ch
orthopairs
orthogroupnum

main:
if (orthopairs) {
orthopairs_ch = Channel.fromPath(orthopairs, checkIfExists: true).collect()
} else {
orthopairs_ch = Channel.fromPath("/path/to/NO_FILE").collect()
}

FORMAT_EX_CLUSTERS_INPUT(score_exon_hits_pairs, clusterfile_ch)
FORMAT_EX_CLUSTERS_INPUT(score_exon_hits_pairs, clusterfile_ch, orthogroupnum)

// Split the file of exon pairs
// Unclustered are the exons ending up in single-exon clusters
Expand Down
39 changes: 19 additions & 20 deletions subworkflows/local/exorthist/prepare.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include { SPLIT_CLUSTERS_BY_SPECIES_PAIRS } from "${LOCAL_MODULES}/split_cluster
workflow PREPARE {

take:
evodists_ch
evodists
clusterfile_ch
gtfs
fastas
Expand All @@ -17,24 +17,13 @@ workflow PREPARE {
long_dist
medium_dist
short_dist
data_to_annotation
genomes
annotations
extraexons
alignmentnum

main:

// Print contents of each channel
// gtfs.view { "GTF file: $it" }
// fastas.view { "FASTA file: $it" }
// gtfs_suffix.view { "GTF suffix: $it" }
// fastas_suffix.view { "FASTA suffix: $it" }
// data_to_annotation.view { "Data to annotation: $it" }

extraexons_ch = params.extraexons ?
Channel.fromFilePairs(params.extraexons, checkIfExists: true, size: 1)
.ifEmpty { error "Extra exons not found" } :
Channel.empty()


CHECK_INPUT(
evodists_ch,
clusterfile_ch,
Expand All @@ -47,11 +36,21 @@ workflow PREPARE {
short_dist
)

// Sic: https://nextflow-io.github.io/patterns/optional-input/
if ( extraexons ) {
GENERATE_ANNOTATIONS(data_to_annotation, extraexons_ch)
evodists_ch = Channel.fromPath(evodists, checkIfExists: true).collect()
extraexons_ch = extraexons ?
Channel.fromFilePairs(extraexons, checkIfExists: true, size: 1)
.ifEmpty { error "Extra exons not found" } :
Channel.empty()

// We join channels. If no extraexons, then it's empty, so no problem
data_to_annotation_raw = genomes.join(annotations)
data_to_annotation = data_to_annotation_raw.join(extraexons_ch, remainder: true)

if (extraexons) {
GENERATE_ANNOTATIONS(data_to_annotation, extraexons_ch)
} else {
GENERATE_ANNOTATIONS(data_to_annotation, Channel.fromPath("/path/to/NO_FILE").collect())
// Sic: https://nextflow-io.github.io/patterns/optional-input/
GENERATE_ANNOTATIONS(data_to_annotation, Channel.fromPath("/path/to/NO_FILE").collect())
}

clusters_split_ch = GENERATE_ANNOTATIONS.out.idfolders.toList().map{ [it, it].combinations().findAll{ a, b -> a[0] < b[0]} }
Expand All @@ -63,7 +62,7 @@ workflow PREPARE {

// Split clusters
cls_tab_files_ch = SPLIT_CLUSTERS_BY_SPECIES_PAIRS.out.cls_tab_files
SPLIT_CLUSTERS_IN_CHUNKS(cls_tab_files_ch.collect(), clusters_split_ch)
SPLIT_CLUSTERS_IN_CHUNKS(cls_tab_files_ch.collect(), clusters_split_ch, alignmentnum)

cls_files_2_align = SPLIT_CLUSTERS_IN_CHUNKS.out.cls_files_2_align
cls_files_2_align_t = cls_files_2_align.transpose().map{[it[0].getFileName().toString()+"-"+it[1].getFileName().toString(), it[0], it[1], it[2]]}
Expand Down
5 changes: 4 additions & 1 deletion subworkflows/local/exorthist/score.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ workflow SCORE {
clusters_split_ch
dist_ranges_ch
bonafide_pairs
long_dist
medium_dist
short_dist

main:

Expand All @@ -20,7 +23,7 @@ workflow SCORE {
SCORE_EX_MATCHES(data_to_score)
// Filter the best matches above score cutoffs by target gene.
all_scores_to_filt_ch = SCORE_EX_MATCHES.out.all_scores_to_filt
FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE(all_scores_to_filt_ch.join(dist_ranges_ch))
FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE(all_scores_to_filt_ch.join(dist_ranges_ch), long_dist, medium_dist, short_dist)
// Join filtered scored EX matches
filterscore_per_joining_ch = FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE.out.filterscore_per_joining
JOIN_FILTERED_EX_MATCHES(filterscore_per_joining_ch.collect())
Expand Down

0 comments on commit fa60d30

Please sign in to comment.