Skip to content

Commit

Permalink
more cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
toniher committed Oct 24, 2024
1 parent 67e8367 commit 8c52857
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 97 deletions.
87 changes: 13 additions & 74 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#!/usr/bin/env nextflow


/*
* Copyright (c) 2019-2024, Centre for Genomic Regulation (CRG)
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/


/*
===========================================================
ExOrthist pipeline for Bioinformatics Core @ CRG
Expand All @@ -23,60 +18,6 @@ ExOrthist pipeline for Bioinformatics Core @ CRG

nextflow.enable.dsl=2


log_main = """
╔╦╗┬ ┬┌─┐ ╔═╗─┐ ┬╔═╗┬─┐┌┬┐┬ ┬┬┌─┐┌┬┐
║ ├─┤├┤ ║╣ ┌┴┬┘║ ║├┬┘ │ ├─┤│└─┐ │
╩ ┴ ┴└─┘ ╚═╝┴ └─╚═╝┴└─ ┴ ┴ ┴┴└─┘ ┴
==============================================================================
annotations (GTF files) : ${params.annotations}
genomes (fasta files) : ${params.genomes}
cluster file (txt files) : ${params.cluster}
pairwise evo distances : ${params.evodists}
long distance parameters : ${params.long_dist}
medium distance parameters : ${params.medium_dist}
short distance parameters : ${params.short_dist}
pre-computed alignments : ${params.prevaln}
alignment number : ${params.alignmentnum}
orthogroup number : ${params.orthogroupnum}
extraexons (e.g. from VastDB) : ${params.extraexons}
bona fide orthologous exon pairs : ${params.bonafide_pairs}
orthopairs : ${params.orthopairs}
output (output folder) : ${params.output}
email for notification : ${params.email}
hook_url : ${params.hook_url}
INFORMATION ABOUT OPTIONS:
The long, medium, short distance cut-offs are in the format: "int_num;ex_seq;ex_len;prot_sim".
Only exon matches respecting all cut-offs are considered homologous.
- int_num (0,1,2): Number of surrounding intron positions required to be conserved.
- ex_seq (from 0 to 1): Minimum sequence similarity % between a
pair of homologous exons and their corresponding upstream and
downstream exons.
- ex_len (from 0 to 1): Maximum size difference between two homologous exons
(as a fraction of either exon).
- prot_sim (from 0 to 1): Minimum sequence similarity over the entire pairwise alignment
for a pair of protein isoforms to be considered for comparison.
See online README at https://github.com/biocorecrg/ExOrthist for further information about the options.
"""

log_plot = """
Executing with the following parameters:
output main : ${params.output}
output plot : ${params.output_plot}
geneID : ${params.geneID}
isoformID : ${params.isoformID}
relevant exons : ${params.relevant_exs}
reclustered gene orthology file : ${params.sub_orthologs}
email for notification : ${params.email}
hook_url : ${params.hook_url}
"""

include { paramsHelp; validateParameters } from 'plugin/nf-schema'

// if( !workflow.resume ) {
// println "Removing the output folder"
// new File("${params.output}").delete()
Expand All @@ -87,19 +28,23 @@ WORKFLOWS='./workflows/'

include { ALIGN } from "${LOCAL_SUBWORKFLOWS}/align.nf"
include { CLUSTER } from "${LOCAL_SUBWORKFLOWS}/cluster.nf"

include { PIPELINE_COMPLETION; PIPELINE_INITIALISATION } from "${LOCAL_SUBWORKFLOWS}/util.nf"

include { PREPARE } from "${LOCAL_SUBWORKFLOWS}/prepare.nf"
include { SCORE } from "${LOCAL_SUBWORKFLOWS}/score.nf"

include { PLOT } from "${WORKFLOWS}/plot.nf"

include { final_message; notify_slack } from "./lib/functions.nf"

workflow {

validateParameters()
PIPELINE_INITIALISATION(
params,
args
)

if (params.wf == "plot" ) {
log.info(log_plot)

PLOT(
params.output,
params.geneID,
Expand All @@ -110,7 +55,6 @@ workflow {
)

} else {
log.info(log_main)

PREPARE(
params.evodists,
Expand Down Expand Up @@ -154,15 +98,10 @@ workflow {
)
}

}
PIPELINE_COMPLETION(
params.wf,
params.email,
params.hook_url
)

workflow.onComplete {
def text = final_message("ExOrthist", params.wf)
println text
if (params.email) {
sendMail(to: params.email, subject: "[ExOrthist] Execution finished", body: msg)
}
if (params.hook_url) {
notify_slack(text, params.hook_url)
}
}
17 changes: 2 additions & 15 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,11 @@ validation {
║ ├─┤├┤ ║╣ ┌┴┬┘║ ║├┬┘ │ ├─┤│└─┐ │
╩ ┴ ┴└─┘ ╚═╝┴ └─╚═╝┴└─ ┴ ┴ ┴┴└─┘ ┴
==============================================================================
INFORMATION ABOUT OPTIONS:
The long, medium, short distance cut-offs are in the format: "int_num;ex_seq;ex_len;prot_sim".
Only exon matches respecting all cut-offs are considered homologous.
- int_num (0,1,2): Number of surrounding intron positions required to be conserved.
- ex_seq (from 0 to 1): Minimum sequence similarity % between a
pair of homologous exons and their corresponding upstream and
downstream exons.
- ex_len (from 0 to 1): Maximum size difference between two homologous exons
(as a fraction of either exon).
- prot_sim (from 0 to 1): Minimum sequence similarity over the entire pairwise alignment
for a pair of protein isoforms to be considered for comparison.
See online README at https://github.com/biocorecrg/ExOrthist for further information about the options.
See online README at https://github.com/biocorecrg/ExOrthist for further information.
"""
afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""}
* Software dependencies
https://github.com/${manifest.name}/blob/master/CITATIONS.md
https://github.com/${manifest.name}/blob/master/CITATIONS.md
"""
summary {
beforeText = validation.help.beforeText
Expand Down
17 changes: 12 additions & 5 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
"type": "string",
"description": "Workflow to be run, by default the main one. Alternative is plot",
"default": "main"
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
}
}
},
Expand Down Expand Up @@ -85,11 +92,6 @@
"description": "Short distance parameters",
"default": "2,0.50,0.60,0.25"
},
"prevaln": {
"type": "string",
"description": "Pre-computed alignments",
"format": "directory-path"
},
"alignmentnum": {
"type": "integer",
"description": "Alignment number",
Expand Down Expand Up @@ -120,6 +122,11 @@
"type": "string",
"description": "Orthopairs",
"format": "file-path"
},
"prevaln": {
"type": "string",
"description": "Pre-computed alignments",
"format": "directory-path"
}
}
},
Expand Down
10 changes: 7 additions & 3 deletions params.config.test
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
params {
// Generic options
wf = "main"
// Schema validation default options
validate_params = true
// Notification options
hook_url = null
email = null
Expand All @@ -10,14 +12,16 @@ params {
annotations = "$projectDir/test/GTF/*_annot-subsetted.gtf.gz"
alignmentnum = 1000
orthogroupnum = 500
extraexons = null
bonafide_pairs = null
orthopairs = null
evodists = "$projectDir/test/evodists.txt"
long_dist = "2,0.10,0.40,0.15"
medium_dist = "2,0.30,0.60,0.20"
short_dist = "2,0.50,0.60,0.25"
// Additional data
extraexons = null
bonafide_pairs = null
orthopairs = null
prevaln = null
// Output folder
output = "$projectDir/output_test"
// Plot pipeline specific options
geneID = "ENSG00000159055"
Expand Down

0 comments on commit 8c52857

Please sign in to comment.