From 8cfcb4c7a6e9dc3030ea3ab7c68cd074d6cbd42f Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 30 Jul 2024 17:54:27 +0200 Subject: [PATCH 001/123] Pin zenodo doi in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 63a92f26..a8a25a52 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13135393-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13135393) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) From 06735054ac6329efaab1bd77c7717081955add64 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 30 Jul 2024 17:54:53 +0200 Subject: [PATCH 002/123] Update changelog for v1.2.0dev --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fbddadb..a1aae7ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 +## v1.2.0dev - [date] -- Minor patch release to fix multiqc report. +### Enhancements & fixes + +## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 ### Enhancements & fixes +- Minor patch release to fix multiqc report. + ## [[1.1.0](https://github.com/nf-core/proteinfold/releases/tag/1.1.0)] - 2025-06-25 ### Credits From f2247ae064bb3dda3288ea1589881f8d356a399d Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 30 Jul 2024 17:58:45 +0200 Subject: [PATCH 003/123] bump dev version in images --- modules/local/colabfold_batch.nf | 2 +- modules/local/mmseqs_colabfoldsearch.nf | 2 +- modules/local/run_alphafold2.nf | 2 +- modules/local/run_alphafold2_msa.nf | 2 +- modules/local/run_alphafold2_pred.nf | 2 +- modules/local/run_esmfold.nf | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch.nf index 5dab51fb..5b1c5467 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch.nf @@ -7,7 +7,7 @@ process COLABFOLD_BATCH { error("Local COLABFOLD_BATCH module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_colabfold:1.1.1" + container "nf-core/proteinfold_colabfold:dev" input: tuple val(meta), path(fasta) diff --git a/modules/local/mmseqs_colabfoldsearch.nf b/modules/local/mmseqs_colabfoldsearch.nf index c2140c5b..c6a2c9b0 100644 --- a/modules/local/mmseqs_colabfoldsearch.nf +++ b/modules/local/mmseqs_colabfoldsearch.nf @@ -7,7 +7,7 @@ process MMSEQS_COLABFOLDSEARCH { error("Local MMSEQS_COLABFOLDSEARCH module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_colabfold:1.1.1" + container "nf-core/proteinfold_colabfold:dev" input: tuple val(meta), path(fasta) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index cb3527d3..20cbf9fc 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2 { error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_standard:1.1.1" + container "nf-core/proteinfold_alphafold2_standard:dev" input: tuple val(meta), path(fasta) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index fdc67e88..85a40676 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2_MSA { error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_msa:1.1.1" + container "nf-core/proteinfold_alphafold2_msa:dev" input: tuple val(meta), path(fasta) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 92b5d2a5..ee9983c5 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2_PRED { error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_split:1.1.1" + container "nf-core/proteinfold_alphafold2_split:dev" input: tuple val(meta), path(fasta) diff --git a/modules/local/run_esmfold.nf b/modules/local/run_esmfold.nf index 66c5bbc7..f37c9eb3 100644 --- a/modules/local/run_esmfold.nf +++ b/modules/local/run_esmfold.nf @@ -6,7 +6,7 @@ process RUN_ESMFOLD { error("Local RUN_ESMFOLD module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_esmfold:1.1.1" + container "nf-core/proteinfold_esmfold:dev" input: tuple val(meta), path(fasta) From 3986454a6333e5ca8db325e6b539c6fd6a371b0c Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 30 Jul 2024 17:59:55 +0200 Subject: [PATCH 004/123] bump dev in multiqc_config --- assets/multiqc_config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 3b58e3d0..f6acb16a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/proteinfold + This report has been generated by the nf-core/proteinfold analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-proteinfold-methods-description": order: -1000 From bfd3c76b0c585d45297bd54e6374dafb27131fa3 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 6 Aug 2024 16:05:30 +0200 Subject: [PATCH 005/123] Fix uppercase model preset --- README.md | 4 ++-- docs/usage.md | 4 ++-- workflows/colabfold.nf | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a8a25a52..522b590e 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ The pipeline takes care of downloading the databases and parameters required by --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ + --colabfold_model_preset "alphaFold2-ptm" \ --use_gpu \ --db_load_mode 0 -profile @@ -115,7 +115,7 @@ The pipeline takes care of downloading the databases and parameters required by --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ + --colabfold_model_preset "alphaFold2-ptm" \ --use_gpu \ -profile ``` diff --git a/docs/usage.md b/docs/usage.md index 12e47552..7c81be35 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -236,7 +236,7 @@ nextflow run nf-core/proteinfold \ --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ + --colabfold_model_preset "alphaFold2-ptm" \ --use_gpu \ --db_load_mode 0 \ -profile @@ -254,7 +254,7 @@ nextflow run nf-core/proteinfold \ --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ + --colabfold_model_preset "alphaFold2-ptm" \ --use_gpu \ -profile ``` diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 3d2829f3..9184a6ff 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -91,7 +91,7 @@ workflow COLABFOLD { // // MODULE: Run mmseqs // - if (params.colabfold_model_preset != 'AlphaFold2-ptm') { + if (params.colabfold_model_preset != 'alphaFold2-ptm') { MULTIFASTA_TO_CSV( ch_fasta ) From 4769bee1d2949ebdd854e5e6025e2139eb5b5244 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 6 Aug 2024 16:11:00 +0200 Subject: [PATCH 006/123] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1aae7ad..503a48ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- [[PR #175](https://github.com/nf-core/proteinfold/pull/175)] - Fix typo in some instances of model preset `alphaFold2-ptm`. + ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 ### Enhancements & fixes From 4ca5fdccb55ed61905d19898a1a3f92d8358beab Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 6 Aug 2024 17:20:27 +0200 Subject: [PATCH 007/123] Fix instances of alphafold2_ptm --- CHANGELOG.md | 2 +- README.md | 4 ++-- docs/usage.md | 4 ++-- workflows/colabfold.nf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 503a48ec..93f2d420 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes -- [[PR #175](https://github.com/nf-core/proteinfold/pull/175)] - Fix typo in some instances of model preset `alphaFold2-ptm`. +- [[PR #175](https://github.com/nf-core/proteinfold/pull/175)] - Fix typo in some instances of model preset `alphafold2_ptm`. ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 diff --git a/README.md b/README.md index 522b590e..dec185a9 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ The pipeline takes care of downloading the databases and parameters required by --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "alphaFold2-ptm" \ + --colabfold_model_preset "alphafold2_ptm" \ --use_gpu \ --db_load_mode 0 -profile @@ -115,7 +115,7 @@ The pipeline takes care of downloading the databases and parameters required by --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "alphaFold2-ptm" \ + --colabfold_model_preset "alphafold2_ptm" \ --use_gpu \ -profile ``` diff --git a/docs/usage.md b/docs/usage.md index 7c81be35..be725651 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -236,7 +236,7 @@ nextflow run nf-core/proteinfold \ --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "alphaFold2-ptm" \ + --colabfold_model_preset "alphafold2_ptm" \ --use_gpu \ --db_load_mode 0 \ -profile @@ -254,7 +254,7 @@ nextflow run nf-core/proteinfold \ --colabfold_db \ --num_recycles_colabfold 3 \ --use_amber \ - --colabfold_model_preset "alphaFold2-ptm" \ + --colabfold_model_preset "alphafold2_ptm" \ --use_gpu \ -profile ``` diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 9184a6ff..46dc8df4 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -91,7 +91,7 @@ workflow COLABFOLD { // // MODULE: Run mmseqs // - if (params.colabfold_model_preset != 'alphaFold2-ptm') { + if (params.colabfold_model_preset != 'alphafold2_ptm') { MULTIFASTA_TO_CSV( ch_fasta ) From 02babc4a2db822e95f54f5696216bb562d2dea07 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 7 Aug 2024 15:16:42 +0200 Subject: [PATCH 008/123] Fix condition to run MULTIFASTA_TO_CSV when executing colabfold server local --- workflows/colabfold.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 46dc8df4..eafc222c 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -91,7 +91,7 @@ workflow COLABFOLD { // // MODULE: Run mmseqs // - if (params.colabfold_model_preset != 'alphafold2_ptm') { + if (params.colabfold_model_preset != 'alphafold2_ptm' && params.colabfold_model_preset != 'alphafold2') { MULTIFASTA_TO_CSV( ch_fasta ) From b520b9bd26d0a995a81760f16a74a76ed0cd69c1 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 13:03:10 +1000 Subject: [PATCH 009/123] accepting mutltiple models --- conf/modules_alphafold2.config | 15 +++++++++++---- conf/modules_colabfold.config | 14 ++++++++++++-- conf/modules_esmfold.config | 11 ++++++++++- main.nf | 14 ++++++++------ nextflow.config | 8 +++++--- nextflow_schema.json | 3 +-- tower.yml | 8 ++++++-- 7 files changed, 53 insertions(+), 20 deletions(-) diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index 4aae2d30..33b04c38 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -17,11 +17,18 @@ process { withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF|ARIA2_PDB_SEQRES' { publishDir = [ - path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, + path: {"${params.outdir}/DBs/alphafold2/${params.alphafold2_mode}"}, mode: 'symlink', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + withName: 'NFCORE_PROTEINFOLD:ALPHAFOLD2:MULTIQC' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : "alphafold2_$filename" } + ] + } } if (params.alphafold2_mode == 'standard') { @@ -33,7 +40,7 @@ if (params.alphafold2_mode == 'standard') { params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] @@ -47,7 +54,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') { withName: 'RUN_ALPHAFOLD2_MSA' { ext.args = params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -57,7 +64,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') { if(params.use_gpu) { accelerator = 1 } ext.args = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false' publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index a7a719b0..2efcfa01 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -10,6 +10,16 @@ ---------------------------------------------------------------------------------------- */ +process { + withName: 'NFCORE_PROTEINFOLD:COLABFOLD:MULTIQC' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : "colabfold_$filename" } + ] + } +} + if (params.colabfold_server == 'webserver') { process { withName: 'COLABFOLD_BATCH' { @@ -20,7 +30,7 @@ if (params.colabfold_server == 'webserver') { params.host_url ? "--host-url ${params.host_url}" : '' ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.colabfold_server}" }, + path: { "${params.outdir}/colabfold/${params.colabfold_server}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: '*.*' @@ -57,7 +67,7 @@ if (params.colabfold_server == 'local') { params.use_templates ? '--templates' : '' ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.colabfold_server}" }, + path: { "${params.outdir}/colabfold/${params.colabfold_server}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: '*.*' diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config index 81b3048f..5b3113b8 100644 --- a/conf/modules_esmfold.config +++ b/conf/modules_esmfold.config @@ -14,10 +14,19 @@ process { withName: 'RUN_ESMFOLD' { ext.args = {params.use_gpu ? '' : '--cpu-only'} publishDir = [ - path: { "${params.outdir}/${params.mode}" }, + path: { "${params.outdir}/colabfold" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: '*.*' ] } + + withName: 'NFCORE_PROTEINFOLD:ESMFOLD:MULTIQC' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : "esmfold_$filename" } + ] + } + } diff --git a/main.nf b/main.nf index d6da0f09..cdc63d84 100644 --- a/main.nf +++ b/main.nf @@ -17,13 +17,15 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -if (params.mode == "alphafold2") { +if (params.mode.toLowerCase().split(",").contains("alphafold2")) { include { PREPARE_ALPHAFOLD2_DBS } from './subworkflows/local/prepare_alphafold2_dbs' include { ALPHAFOLD2 } from './workflows/alphafold2' -} else if (params.mode == "colabfold") { +} +if (params.mode.toLowerCase().split(",").contains("colabfold")) { include { PREPARE_COLABFOLD_DBS } from './subworkflows/local/prepare_colabfold_dbs' include { COLABFOLD } from './workflows/colabfold' -} else if (params.mode == "esmfold") { +} +if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' } @@ -60,7 +62,7 @@ workflow NFCORE_PROTEINFOLD { // // WORKFLOW: Run alphafold2 // - if(params.mode == "alphafold2") { + if(params.mode.toLowerCase().split(",").contains("alphafold2")) { // // SUBWORKFLOW: Prepare Alphafold2 DBs // @@ -118,7 +120,7 @@ workflow NFCORE_PROTEINFOLD { // // WORKFLOW: Run colabfold // - else if(params.mode == "colabfold") { + if(params.mode.toLowerCase().split(",").contains("colabfold")) { // // SUBWORKFLOW: Prepare Colabfold DBs // @@ -153,7 +155,7 @@ workflow NFCORE_PROTEINFOLD { // // WORKFLOW: Run esmfold // - else if(params.mode == "esmfold") { + if(params.mode.toLowerCase().split(",").contains("esmfold")) { // // SUBWORKFLOW: Prepare esmfold DBs // diff --git a/nextflow.config b/nextflow.config index 7a0c5c4e..3f7428dd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -330,11 +330,13 @@ manifest { includeConfig 'conf/modules.config' // Load modules config for pipeline specific modes -if (params.mode == 'alphafold2') { +if (params.mode.toLowerCase().split(",").contains("alphafold2")) { includeConfig 'conf/modules_alphafold2.config' -} else if (params.mode == 'colabfold') { +} +if (params.mode.toLowerCase().split(",").contains("colabfold")) { includeConfig 'conf/modules_colabfold.config' -} else if (params.mode == 'esmfold') { +} +if (params.mode.toLowerCase().split(",").contains("esmfold")) { includeConfig 'conf/modules_esmfold.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index df0bbfe3..2d3ce68e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,8 +32,7 @@ "mode": { "type": "string", "default": "alphafold2", - "description": "Specifies the mode in which the pipeline will be run", - "enum": ["alphafold2", "colabfold", "esmfold"], + "description": "Specifies the mode in which the pipeline will be run. mode can be any combination of ['alphafold2', 'colabfold', 'esmfold'] separated by a comma (',') with no spaces.", "fa_icon": "fas fa-cogs" }, "use_gpu": { diff --git a/tower.yml b/tower.yml index 787aedfe..7fc70c10 100644 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,9 @@ reports: - multiqc_report.html: - display: "MultiQC HTML report" + esmfold_multiqc_report.html: + display: "ESMFOLD MultiQC HTML report" + alphafold2_multiqc_report.html: + display: "ALPHAFOLD2 MultiQC HTML report" + colabfold_multiqc_report.html: + display: "COLABFOLD MultiQC HTML report" samplesheet.csv: display: "Auto-created samplesheet with collated metadata and FASTQ paths" From 78ba2883c9120deb7925aaa4dcbf8933b3610339 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 13:33:05 +1000 Subject: [PATCH 010/123] accepting mutltiple models --- conf/modules_esmfold.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config index 5b3113b8..d8356924 100644 --- a/conf/modules_esmfold.config +++ b/conf/modules_esmfold.config @@ -14,7 +14,7 @@ process { withName: 'RUN_ESMFOLD' { ext.args = {params.use_gpu ? '' : '--cpu-only'} publishDir = [ - path: { "${params.outdir}/colabfold" }, + path: { "${params.outdir}/esmfold" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: '*.*' From e7b4f06079680105b573a5b23abdd89837a1cbd6 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 13:56:00 +1000 Subject: [PATCH 011/123] update usage --- docs/usage.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index be725651..55120275 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,7 +37,9 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p ## Running the pipeline -The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. +The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. + +> You can run any combination of the models by providing them to the `--mode` parameter separated by a comma. For example: `--mode alphafold2,esmfold,colabfold` will run the three models in parallel. AlphaFold2 regular can be run using this command: From 0bb9d9d378170b4b0c60e210d6bd8934d431e6c6 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 14:08:02 +1000 Subject: [PATCH 012/123] update docs - usage and output --- docs/output.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/output.md b/docs/output.md index 29d2337c..9b9a8fb8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -183,9 +183,9 @@ Below you can find an indicative example of the TSV file with the pLDDT scores p Output files - `multiqc` - - multiqc_report.html: A standalone HTML file that can be viewed in your web browser. - - multiqc_data/: Directory containing parsed statistics from the different tools used in the pipeline. - - multiqc_plots/: Directory containing static images from the report in various formats. + - `_multiqc_report.html`: A standalone HTML file that can be viewed in your web browser. + - `_multiqc_data/`: Directory containing parsed statistics from the different tools used in the pipeline. + - `_multiqc_plots/`: Directory containing static images from the report in various formats. From 080c16f8aed9c5fdd5585a4d04d9a2203a481877 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 14:14:27 +1000 Subject: [PATCH 013/123] update change log --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93f2d420..9ba92e3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR #175](https://github.com/nf-core/proteinfold/pull/175)] - Fix typo in some instances of model preset `alphafold2_ptm`. +- [[PR #178](https://github.com/nf-core/proteinfold/pull/178)] - Enable running multiple modes in parallel. + ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 ### Enhancements & fixes From fbe52036cc47894c57e8c0f877d2e37fe980e826 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 14:30:16 +1000 Subject: [PATCH 014/123] lint minor fixes --- main.nf | 4 ++-- nextflow.config | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index cdc63d84..6d762db1 100644 --- a/main.nf +++ b/main.nf @@ -20,11 +20,11 @@ nextflow.enable.dsl = 2 if (params.mode.toLowerCase().split(",").contains("alphafold2")) { include { PREPARE_ALPHAFOLD2_DBS } from './subworkflows/local/prepare_alphafold2_dbs' include { ALPHAFOLD2 } from './workflows/alphafold2' -} +} if (params.mode.toLowerCase().split(",").contains("colabfold")) { include { PREPARE_COLABFOLD_DBS } from './subworkflows/local/prepare_colabfold_dbs' include { COLABFOLD } from './workflows/colabfold' -} +} if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' diff --git a/nextflow.config b/nextflow.config index 3f7428dd..2bc5ca0a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -332,10 +332,10 @@ includeConfig 'conf/modules.config' // Load modules config for pipeline specific modes if (params.mode.toLowerCase().split(",").contains("alphafold2")) { includeConfig 'conf/modules_alphafold2.config' -} +} if (params.mode.toLowerCase().split(",").contains("colabfold")) { includeConfig 'conf/modules_colabfold.config' -} +} if (params.mode.toLowerCase().split(",").contains("esmfold")) { includeConfig 'conf/modules_esmfold.config' } From dc3bf4fe90cdebf47a09b8de703c93f1acfce78f Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Sat, 21 Sep 2024 14:44:39 +1000 Subject: [PATCH 015/123] lint fixes --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 55120275..ecf813ce 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,7 +37,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p ## Running the pipeline -The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. +The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. > You can run any combination of the models by providing them to the `--mode` parameter separated by a comma. For example: `--mode alphafold2,esmfold,colabfold` will run the three models in parallel. From 2ae16a3ff93fe3f4dbf3149182274eefd74e7628 Mon Sep 17 00:00:00 2001 From: Ziad Al-Bkhetan Date: Thu, 26 Sep 2024 17:34:10 +1000 Subject: [PATCH 016/123] Update CHANGELOG.md Co-authored-by: Jose Espinosa-Carrasco --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ba92e3b..85ada4e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes - [[PR #175](https://github.com/nf-core/proteinfold/pull/175)] - Fix typo in some instances of model preset `alphafold2_ptm`. - - [[PR #178](https://github.com/nf-core/proteinfold/pull/178)] - Enable running multiple modes in parallel. ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 From 73a75e25e7a9f845a8c06b2c0427248ee37340ae Mon Sep 17 00:00:00 2001 From: Ziad Al-Bkhetan Date: Thu, 26 Sep 2024 17:34:18 +1000 Subject: [PATCH 017/123] Update CHANGELOG.md Co-authored-by: Jose Espinosa-Carrasco --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85ada4e4..44cd1bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes -- [[PR #175](https://github.com/nf-core/proteinfold/pull/175)] - Fix typo in some instances of model preset `alphafold2_ptm`. +- [[#177](https://github.com/nf-core/proteinfold/issues/177)]- Fix typo in some instances of model preset `alphafold2_ptm`. - [[PR #178](https://github.com/nf-core/proteinfold/pull/178)] - Enable running multiple modes in parallel. ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 From 3d02fc570ab914866a3f7add869b3bb1f9a9f80c Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Tue, 1 Oct 2024 10:55:47 +1000 Subject: [PATCH 018/123] add results visulisation --- assets/NO_FILE | 0 assets/proteinfold_template.html | 908 +++++++++++++++++++++++++++ bin/extract_output.py | 28 + bin/generat_report.py | 386 ++++++++++++ main.nf | 24 +- modules/local/colabfold_batch.nf | 8 +- modules/local/generat_report.nf | 51 ++ modules/local/run_alphafold2.nf | 18 +- modules/local/run_alphafold2_msa.nf | 2 +- modules/local/run_alphafold2_pred.nf | 21 +- modules/local/run_esmfold.nf | 6 +- nextflow.config | 3 +- nextflow_schema.json | 5 + workflows/alphafold2.nf | 21 +- workflows/esmfold.nf | 7 +- 15 files changed, 1461 insertions(+), 27 deletions(-) create mode 100644 assets/NO_FILE create mode 100644 assets/proteinfold_template.html create mode 100755 bin/extract_output.py create mode 100755 bin/generat_report.py create mode 100644 modules/local/generat_report.nf diff --git a/assets/NO_FILE b/assets/NO_FILE new file mode 100644 index 00000000..e69de29b diff --git a/assets/proteinfold_template.html b/assets/proteinfold_template.html new file mode 100644 index 00000000..57cfe3d7 --- /dev/null +++ b/assets/proteinfold_template.html @@ -0,0 +1,908 @@ + + + + + + + Protein structure prediction + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + + + + +
+ +
+ + +
+
+
+
+
+
<50
+
70
+
90+
+
+
+
+ +
+

+ Alphafold produces a + + per-residue confidence score (pLDDT) + + between 0 and 100. Some regions below 50 pLDDT may be unstructured in isolation. +

+
+
+ + + + + + +
+ +
+ +
+ + +
+
+
Information
+ +
+
+
Program: *prog_name*
+
ID: *sample_name*
+
+
Average:
+
+ +
+
Navigation
+ + +
+
+ Scroll up/down + to zoom in and out +
+
+ Click + drag + to rotate the structure +
+
+ CTRL + click + drag + to move the structure +
+
+ Click + an atom to bring it into focus +
+
+
+
+ + +
+
+
Toggle representations
+
+ + + + +
+
+ +
+
+
+
Actions
+
+ + + +
+
+
+
Download
+ +
+ + +
+
+
+
+
+
+
+ +
+
+
+
+
+
+
+ + +
+
+
Sequence Coverage
+
+
+ +
+ +
+
+ +
+
pLDDT
+
+ +
+
+
+
+
+ + +
+ + +
+
+
+ + +
+
+

+ The Australian BioCommons + is supported by + Bioplatforms Australia +

+

+ Bioplatforms Australia + is enabled by + NCRIS +

+
+
+
+ + + + \ No newline at end of file diff --git a/bin/extract_output.py b/bin/extract_output.py new file mode 100755 index 00000000..a43a8a3c --- /dev/null +++ b/bin/extract_output.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +import pickle +import os, sys +import argparse + +def read_pkl(id, pkl_files): + for pkl_file in pkl_files: + dict_data = pickle.load(open(pkl_file,'rb')) + #print(dict_data.keys()) + if pkl_file.endswith("features.pkl"): + with open (f"{id}_msa.tsv", "w") as out_f: + for val in dict_data['msa']: + out_f.write("\t".join([str(x) for x in val]) + "\n") + else: + model_id = os.path.basename(pkl_file).replace("result_model_", "").replace("_pred_0.pkl", "") + with open (f"{id}_lddt_{model_id}.tsv", "w") as out_f: + out_f.write("\t".join([str(x) for x in dict_data['plddt']]) + "\n") + + +parser = argparse.ArgumentParser() +parser.add_argument('--pkls',dest='pkls',required=True, nargs="+") +parser.add_argument('--name',dest='name') +parser.add_argument('--output_dir',dest='output_dir') +parser.set_defaults(output_dir='') +parser.set_defaults(name='') +args = parser.parse_args() + +read_pkl(args.name, args.pkls) diff --git a/bin/generat_report.py b/bin/generat_report.py new file mode 100755 index 00000000..dcb48911 --- /dev/null +++ b/bin/generat_report.py @@ -0,0 +1,386 @@ +#!/usr/bin/env python + +import os +from matplotlib import pyplot as plt +import argparse +from collections import OrderedDict +import base64 +import os +from collections import OrderedDict +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import re +from Bio import PDB + +def generate_output_images(msa_path, plddt_data, name, out_dir, in_type, generate_tsv): + msa = [] + if not msa_path.endswith("NO_FILE"): + with open(msa_path, 'r') as in_file: + for line in in_file: + msa.append([int(x) for x in line.strip().split()]) + + seqid = [] + for sequence in msa: + matches = [1.0 if first == other else 0.0 for first, other in zip(msa[0], sequence)] + seqid.append(sum(matches) / len(matches)) + + seqid_sort = sorted(range(len(seqid)), key=seqid.__getitem__) + + non_gaps = [] + for sequence in msa: + non_gaps.append([float(num != 21) if num != 21 else float('nan') for num in sequence]) + + sorted_non_gaps = [non_gaps[i] for i in seqid_sort] + final = [] + for sorted_seq, identity in zip(sorted_non_gaps, [seqid[i] for i in seqid_sort]): + final.append([value * identity if not isinstance(value, str) else value for value in sorted_seq]) + + # ################################################################## + plt.figure(figsize=(14, 14), dpi=100) + # ################################################################## + plt.title("Sequence coverage", fontsize=30, pad=36) + plt.imshow(final, + interpolation='nearest', aspect='auto', + cmap="rainbow_r", vmin=0, vmax=1, origin='lower') + + column_counts = [0] * len(msa[0]) + for col in range(len(msa[0])): + for row in msa: + if row[col] != 21: + column_counts[col] += 1 + + plt.plot(column_counts, color='black') + plt.xlim(-0.5, len(msa[0]) - 0.5) + plt.ylim(-0.5, len(msa) - 0.5) + + plt.tick_params(axis='both', which='both', labelsize=18) + + cbar = plt.colorbar() + cbar.set_label("Sequence identity to query", fontsize=24, labelpad=24) + cbar.ax.tick_params(labelsize=18) + plt.xlabel("Positions", fontsize=24, labelpad=24) + plt.ylabel("Sequences", fontsize=24, labelpad=36) + plt.savefig(f"{out_dir}/{name+('_' if name else '')}seq_coverage.png") + + # ################################################################## + + plddt_per_model = OrderedDict() + output_data = plddt_data + + if generate_tsv == "y": + for plddt_path in output_data: + with open(plddt_path, 'r') as in_file: + plddt_per_model[os.path.basename(plddt_path)[:-4]] = [float(x) for x in in_file.read().strip().split()] + else: + for i, plddt_values_str in enumerate(output_data): + plddt_per_model[i] = [] + plddt_per_model[i] = [float(x) for x in plddt_values_str.strip().split()] + + # plt.figure(figsize=(14, 14), dpi=100) + # plt.title("Predicted LDDT per position") + # for model_name, value_plddt in plddt_per_model.items(): + # plt.plot(value_plddt, label=model_name) + # plt.ylim(0, 100) + # plt.ylabel("Predicted LDDT") + # plt.xlabel("Positions") + # plt.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT.png") + + # # split into figures + # i = 0 + # for model_name, value_plddt in plddt_per_model.items(): + # plt.figure(figsize=(14, 14), dpi=100) + # plt.title("Predicted LDDT per position") + # plt.plot(value_plddt, label=model_name) + # plt.ylim(0, 100) + # plt.ylabel("Predicted LDDT") + # plt.xlabel("Positions") + # plt.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.png") + # i += 1 + + fig = go.Figure() + for idx, (model_name, value_plddt) in enumerate(plddt_per_model.items()): + rank_label = f"Ranked {idx}" + fig.add_trace(go.Scatter( + x=list(range(len(value_plddt))), + y=value_plddt, + mode='lines', + name=rank_label, + text=[f"({i}, {value:.2f})" for i, value in enumerate(value_plddt)], + hoverinfo='text' + )) + fig.update_layout( + title=dict( + text='Predicted LDDT per position', + x=0.5, + xanchor='center' + ), + xaxis=dict( + title='Positions', + showline=True, + linecolor='black', + gridcolor='WhiteSmoke' + ), + yaxis=dict( + title='Predicted LDDT', + range=[0, 100], + minallowed=0, + maxallowed=100, + showline=True, + linecolor='black', + gridcolor='WhiteSmoke' + ), + legend=dict( + yanchor="bottom", + y=0, + xanchor="right", + x=1.3 + ), + plot_bgcolor='white', + width=600, + height=600, + modebar_remove=['toImage', 'zoomIn', 'zoomOut'] + ) + html_content = fig.to_html(full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True, 'displaylogo': False, 'scrollZoom': True}) + + with open(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT.html", "w") as out_file: + out_file.write(html_content) + + + ################################################################## + + + ################################################################## + """ + num_models = 5 # columns + num_runs_per_model = math.ceil(len(model_names)/num_models) + fig = plt.figure(figsize=(3 * num_models, 2 * num_runs_per_model), dpi=100) + for n, (model_name, value) in enumerate(pae_plddt_per_model.items()): + plt.subplot(num_runs_per_model, num_models, n + 1) + plt.title(model_name) + plt.imshow(value["pae"], label=model_name, cmap="bwr", vmin=0, vmax=30) + plt.colorbar() + fig.tight_layout() + plt.savefig(f"{out_dir}/{name+('_' if name else '')}PAE.png") + """ + ################################################################## + +def generate_plots(msa_path, plddt_paths, name, out_dir): + msa = [] + with open(msa_path, 'r') as in_file: + for line in in_file: + msa.append([int(x) for x in line.strip().split()]) + + seqid = [] + for sequence in msa: + matches = [1.0 if first == other else 0.0 for first, other in zip(msa[0], sequence)] + seqid.append(sum(matches) / len(matches)) + + seqid_sort = sorted(range(len(seqid)), key=seqid.__getitem__) + + non_gaps = [] + for sequence in msa: + non_gaps.append([float(num != 21) if num != 21 else float('nan') for num in sequence]) + + sorted_non_gaps = [non_gaps[i] for i in seqid_sort] + final = [] + for sorted_seq, identity in zip(sorted_non_gaps, [seqid[i] for i in seqid_sort]): + final.append([value * identity if not isinstance(value, str) else value for value in sorted_seq]) + + # Plotting Sequence Coverage using Plotly + fig = go.Figure() + fig.add_trace(go.Heatmap( + z=final, + colorscale="Rainbow", + zmin=0, + zmax=1, + )) + fig.update_layout( + title="Sequence coverage", + xaxis_title="Positions", + yaxis_title="Sequences" + ) + # Save as interactive HTML instead of an image + fig.savefig(f"{out_dir}/{name+('_' if name else '')}seq_coverage.png") + """ + #fig.to_html(full_html=False).write_html(f"{out_dir}/{name+('_' if name else '')}seq_coverage.html") + with open (f"{out_dir}/{name+('_' if name else '')}seq_coverage.html", "w") as out_plt: + out_plt.write(fig.to_html(full_html=False)) + """ + # Plotting Predicted LDDT per position using Plotly + plddt_per_model = OrderedDict() + plddt_paths.sort() + for plddt_path in plddt_paths: + with open(plddt_path, 'r') as in_file: + plddt_per_model[os.path.basename(plddt_path)[:-4]] = [float(x) for x in in_file.read().strip().split()] + + i = 0 + for model_name, value_plddt in plddt_per_model.items(): + fig = go.Figure() + fig.add_trace(go.Scatter( + x=list(range(len(value_plddt))), + y=value_plddt, + mode='lines', + name=model_name + )) + fig.update_layout(title="Predicted LDDT per Position") + fig.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.png") + """ + with open (f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.html", "w") as out_plt: + out_plt.write(fig.to_html(full_html=False).replace("\"", "\\\"")) + """ + i += 1 + +def align_structures(structures): + parser = PDB.PDBParser(QUIET=True) + structures = [parser.get_structure(f'Structure_{i}', pdb) for i, pdb in enumerate(structures)] + + ref_structure = structures[0] + ref_atoms = [atom for atom in ref_structure.get_atoms()] + + super_imposer = PDB.Superimposer() + aligned_structures = [structures[0]] # Include the reference structure in the list + + for i, structure in enumerate(structures[1:], start=1): + target_atoms = [atom for atom in structure.get_atoms()] + + super_imposer.set_atoms(ref_atoms, target_atoms) + super_imposer.apply(structure.get_atoms()) + + aligned_structure = f'aligned_structure_{i}.pdb' + io = PDB.PDBIO() + io.set_structure(structure) + io.save(aligned_structure) + aligned_structures.append(aligned_structure) + + return aligned_structures + + +def pdb_to_lddt(pdb_files, generate_tsv): + pdb_files_sorted = pdb_files + pdb_files_sorted.sort() + + output_lddt = [] + averages = [] + + for pdb_file in pdb_files_sorted: + plddt_values = [] + seen_lines = set() + + with open(pdb_file, 'r') as infile: + for line in infile: + columns = line.split() + if len(columns) >= 11: + key = f"{columns[5]}\t{columns[10]}" + if key not in seen_lines: + seen_lines.add(key) + plddt_values.append(float(columns[10])) + + # Calculate the average PLDDT value for the current file + if plddt_values: + avg_plddt = sum(plddt_values) / len(plddt_values) + averages.append(avg_plddt) + else: + averages.append(0.0) + + if generate_tsv == "y": + output_file = f"{pdb_file.replace('.pdb', '')}_plddt.tsv" + with open(output_file, 'w') as outfile: + outfile.write(" ".join(map(str, plddt_values)) + "\n") + output_lddt.append(output_file) + else: + plddt_values_string = " ".join(map(str, plddt_values)) + output_lddt.append(plddt_values_string) + + return output_lddt, averages + +print("Starting...") + +parser = argparse.ArgumentParser() +parser.add_argument('--type', dest='in_type') +parser.add_argument('--generate_tsv', choices=['y', 'n'], default = 'n', dest='generate_tsv') +parser.add_argument('--msa', dest='msa', default='NO_FILE') +parser.add_argument('--pdb', dest='pdb',required=True, nargs="+") +parser.add_argument('--name', dest='name') +parser.add_argument('--output_dir',dest='output_dir') +parser.add_argument('--html_template',dest='html_template') +parser.set_defaults(output_dir='') +parser.set_defaults(in_type='ESM-FOLD') +parser.set_defaults(name='') +args = parser.parse_args() + +lddt_data, lddt_averages = pdb_to_lddt(args.pdb, args.generate_tsv) + +generate_output_images(args.msa, lddt_data, args.name, args.output_dir, args.in_type, args.generate_tsv) +#generate_plots(args.msa, args.plddt, args.name, args.output_dir) + +print("generating html report...") +structures = args.pdb +structures.sort() +aligned_structures = align_structures(structures) + +io = PDB.PDBIO() +ref_structure_path = 'aligned_structure_0.pdb' +io.set_structure(aligned_structures[0]) +io.save(ref_structure_path) +aligned_structures[0] = ref_structure_path + +alphafold_template = open(args.html_template, "r").read() +alphafold_template = alphafold_template.replace(f"*sample_name*", args.name) +alphafold_template = alphafold_template.replace(f"*prog_name*", args.in_type) + +args_pdb_array_js = ",\n".join([f'"{model}"' for model in structures]) +alphafold_template = re.sub( + r'const MODELS = \[.*?\];', # Match the existing MODELS array in HTML template + f'const MODELS = [\n {args_pdb_array_js}\n];', # Replace with the new array + alphafold_template, + flags=re.DOTALL, +) + +averages_js_array = f"const LDDT_AVERAGES = {lddt_averages};" +alphafold_template = alphafold_template.replace("const LDDT_AVERAGES = [];", averages_js_array) + +i = 0 +for structure in aligned_structures: + alphafold_template = alphafold_template.replace(f"*_data_ranked_{i}.pdb*", open(structure, "r").read().replace("\n", "\\n")) + i += 1 + +if True: + if not args.msa.endswith("NO_FILE"): + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.png", "rb") as in_file: + alphafold_template = alphafold_template.replace("seq_coverage.png", f"data:image/png;base64,{base64.b64encode(in_file.read()).decode('utf-8')}") + + # with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.html", "r") as in_file: + # seq_cov_html = in_file.read() + # alphafold_template = alphafold_template.replace("
", seq_cov_html) + + else: + pattern = r'
.*?(.*?)*?
\s*' + alphafold_template = re.sub(pattern, '', alphafold_template, flags=re.DOTALL) + + # alphafold_template = alphafold_template.replace("seq_coverage.png","") + + # for i in range(0, len(args.plddt)): + # with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.png", "rb") as in_file: + # alphafold_template = alphafold_template.replace(f"coverage_LDDT_{i}.png", f"data:image/png;base64,{base64.b64encode(in_file.read()).decode('utf-8')}") + + # for i in range(0, len(args.plddt)): + # with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.html", "r") as in_file: + # lddt_html = in_file.read() + # alphafold_template = alphafold_template.replace("
", lddt_html) + + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT.html", "r") as in_file: + lddt_html = in_file.read() + alphafold_template = alphafold_template.replace("
", lddt_html) + +""" +with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.html", "r") as in_file: + alphafold_template = alphafold_template.replace(f"seq_coverage.png", f"{in_file.read()}") + +for i in range(0, 5): + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.html", "r") as in_file: + alphafold_template = alphafold_template.replace(f"coverage_LDDT_{i}.png", f"{in_file.read()}") + +""" + +with open(f"{args.output_dir}/{args.name}_{args.in_type}_report.html", "w") as out_file: + out_file.write(alphafold_template) diff --git a/main.nf b/main.nf index 6d762db1..e41745fa 100644 --- a/main.nf +++ b/main.nf @@ -35,6 +35,7 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nf include { getColabfoldAlphafold2Params } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' include { getColabfoldAlphafold2ParamsPath } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { GENERATE_REPORT } from './modules/local/generat_report' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COLABFOLD PARAMETER VALUES @@ -58,7 +59,7 @@ workflow NFCORE_PROTEINFOLD { main: ch_multiqc = Channel.empty() ch_versions = Channel.empty() - + ch_report_input = Channel.empty() // // WORKFLOW: Run alphafold2 // @@ -115,6 +116,10 @@ workflow NFCORE_PROTEINFOLD { ) ch_multiqc = ALPHAFOLD2.out.multiqc_report ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) + ch_report_input = ch_report_input.mix( + ALPHAFOLD2.out.pdb.join(ALPHAFOLD2.out.msa).map{it[0]["model"] = "ALPHAFOLD2"; it} + ) + } // @@ -178,7 +183,24 @@ workflow NFCORE_PROTEINFOLD { ) ch_multiqc = ESMFOLD.out.multiqc_report ch_versions = ch_versions.mix(ESMFOLD.out.versions) + ch_report_input = ch_report_input.mix( + ESMFOLD.out.pdb.combine(Channel.fromPath("$projectDir/assets/NO_FILE")).map{it[0]["model"] = "ESMFOLD"; it} + ) } + + // + // POST PROCESSING: generate visulaisation reports + // + if (!params.skip_visualisation){ + GENERATE_REPORT( + ch_report_input.map{[it[0], it[1]]}, + ch_report_input.map{[it[0], it[2]]}, + ch_report_input.map{it[0].model}, + Channel.fromPath("$projectDir/assets/proteinfold_template.html").first() + ) + } + + emit: multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [version1, version2, ...] diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch.nf index 5b1c5467..691e4bd8 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch.nf @@ -18,9 +18,11 @@ process COLABFOLD_BATCH { val numRec output: - path ("*") , emit: pdb - path ("*_mqc.png") , emit: multiqc - path "versions.yml", emit: versions + tuple val(meta), path ("*_relaxed_rank_*.pdb") , emit: pdb + tuple val(meta), path ("*_coverage.png") , emit: msa + tuple val(meta), path ("*_scores_rank.json") , emit: scores + path ("*_mqc.png") , emit: multiqc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/generat_report.nf b/modules/local/generat_report.nf new file mode 100644 index 00000000..77ded432 --- /dev/null +++ b/modules/local/generat_report.nf @@ -0,0 +1,51 @@ +process GENERATE_REPORT { + tag "$meta.id-$meta.model" + label 'process_single' + + conda "bioconda::multiqc:1.21" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:e865101a15ad0014' : + 'community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:4d51afeb4bb75495' }" + + input: + tuple val(meta), path(pdb) + tuple val(meta_msa), path(msa) + val(output_type) + path(template) + + output: + tuple val(meta), path ("*report.html"), emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + generat_report.py --type ${output_type} \\ + --msa ${msa} \\ + --pdb ${pdb.join(' ')} \\ + --html_template ${template} \\ + --output_dir ./ \\ + --name ${meta.id} \\ + $args \\ + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch test_alphafold2_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 20cbf9fc..11fe9eed 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -29,7 +29,9 @@ process RUN_ALPHAFOLD2 { output: path ("${fasta.baseName}*") - path "*_mqc.tsv", emit: multiqc + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb + tuple val(meta), path ("${fasta.baseName}/*_msa.tsv"), emit: msa + tuple val(meta), path ("*_mqc.tsv"), emit: multiqc path "versions.yml", emit: versions when: @@ -72,6 +74,9 @@ process RUN_ALPHAFOLD2 { paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv + + extract_output.py --name ${fasta.baseName} \\ + --pkls features.pkl cd .. cat <<-END_VERSIONS > versions.yml @@ -84,10 +89,17 @@ process RUN_ALPHAFOLD2 { """ touch ./"${fasta.baseName}".alphafold.pdb touch ./"${fasta.baseName}"_mqc.tsv - + mkdir "${fasta.baseName}" + touch "${fasta.baseName}/ranked_0.pdb" + touch "${fasta.baseName}/ranked_1.pdb" + touch "${fasta.baseName}/ranked_2.pdb" + touch "${fasta.baseName}/ranked_3.pdb" + touch "${fasta.baseName}/ranked_4.pdb" + touch "${fasta.baseName}/${fasta.baseName}_msa.tsv + cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + python: \$(python3 --version | sed 's/Python //g') END_VERSIONS """ } diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 85a40676..a4f00676 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -29,7 +29,7 @@ process RUN_ALPHAFOLD2_MSA { output: path ("${fasta.baseName}*") - path ("${fasta.baseName}.features.pkl"), emit: features + tuple val(meta), path ("${fasta.baseName}.features.pkl"), emit: features path "versions.yml" , emit: versions when: diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index ee9983c5..4f5ac62c 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -26,11 +26,13 @@ process RUN_ALPHAFOLD2_PRED { path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') - path msa + tuple val(meta), path(msa) output: path ("${fasta.baseName}*") - path "*_mqc.tsv", emit: multiqc + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb + tuple val(meta), path ("*_msa.tsv"), emit: msa + tuple val(meta), path ("*_mqc.tsv"), emit: multiqc path "versions.yml", emit: versions when: @@ -58,8 +60,10 @@ process RUN_ALPHAFOLD2_PRED { paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv + cd .. - + extract_output.py --name ${fasta.baseName} \\ + --pkls ${msa} cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') @@ -70,10 +74,17 @@ process RUN_ALPHAFOLD2_PRED { """ touch ./"${fasta.baseName}".alphafold.pdb touch ./"${fasta.baseName}"_mqc.tsv - + mkdir "${fasta.baseName}" + touch "${fasta.baseName}/ranked_0.pdb" + touch "${fasta.baseName}/ranked_1.pdb" + touch "${fasta.baseName}/ranked_2.pdb" + touch "${fasta.baseName}/ranked_3.pdb" + touch "${fasta.baseName}/ranked_4.pdb" + touch ${fasta.baseName}_msa.tsv + cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + python: \$(python3 --version | sed 's/Python //g') END_VERSIONS """ } diff --git a/modules/local/run_esmfold.nf b/modules/local/run_esmfold.nf index f37c9eb3..bc1ee611 100644 --- a/modules/local/run_esmfold.nf +++ b/modules/local/run_esmfold.nf @@ -14,8 +14,8 @@ process RUN_ESMFOLD { val numRec output: - path ("${fasta.baseName}*.pdb"), emit: pdb - path ("${fasta.baseName}_plddt_mqc.tsv"), emit: multiqc + tuple val(meta), path ("${fasta.baseName}*.pdb"), emit: pdb + tuple val(meta), path ("${fasta.baseName}_plddt_mqc.tsv"), emit: multiqc path "versions.yml", emit: versions when: @@ -36,7 +36,7 @@ process RUN_ESMFOLD { awk '{print \$2"\\t"\$3"\\t"\$4"\\t"\$6"\\t"\$11}' "${fasta.baseName}"*.pdb | grep -v 'N/A' | uniq > plddt.tsv echo -e Atom_serial_number"\\t"Atom_name"\\t"Residue_name"\\t"Residue_sequence_number"\\t"pLDDT > header.tsv cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv - + #mv "${fasta.baseName}"*.pdb ${fasta.baseName}.pdb cat <<-END_VERSIONS > versions.yml "${task.process}": esm-fold: $VERSION diff --git a/nextflow.config b/nextflow.config index 2bc5ca0a..ac81ccf0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -81,7 +81,8 @@ params { // Process skipping options skip_multiqc = false - + skip_visualisation = false + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 2d3ce68e..dc83e19a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -203,6 +203,11 @@ "type": "boolean", "description": "Skip MultiQC.", "fa_icon": "fas fa-fast-forward" + }, + "skip_visualisation": { + "type": "boolean", + "description": "Skip Visualisation reports.", + "fa_icon": "fas fa-fast-forward" } } }, diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 9a1aebae..6a470649 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -57,6 +57,8 @@ workflow ALPHAFOLD2 { main: ch_multiqc_files = Channel.empty() + ch_pdb = Channel.empty() + ch_msa = Channel.empty() // // Create input channel from input file provided through params.input @@ -94,7 +96,9 @@ workflow ALPHAFOLD2 { ch_pdb_seqres, ch_uniprot ) - ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.collect() + ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2.out.pdb) + ch_msa = ch_pdb.mix(RUN_ALPHAFOLD2.out.msa) + ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.map{it[1]}.collect() ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) } else if (alphafold2_mode == 'split_msa_prediction') { @@ -134,7 +138,9 @@ workflow ALPHAFOLD2 { ch_uniprot, RUN_ALPHAFOLD2_MSA.out.features ) - ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.collect() + ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.pdb) + ch_msa = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.msa) + ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.map{it[1]}.collect() ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) } @@ -144,7 +150,6 @@ workflow ALPHAFOLD2 { softwareVersionsToYAML(ch_versions) .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) .set { ch_collated_versions } - // // MODULE: MultiQC // @@ -166,15 +171,17 @@ workflow ALPHAFOLD2 { ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_files.collect(sort: true), + ch_multiqc_config.toSortedList(), + ch_multiqc_custom_config.toSortedList(), + ch_multiqc_logo.toSortedList() ) ch_multiqc_report = MULTIQC.out.report.toList() } emit: + pdb = ch_pdb // channel: /path/to/*.pdb + msa = ch_msa // channel: /path/to/*msa.tsv multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index 962c01a1..18b2a7f9 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -101,7 +101,7 @@ workflow ESMFOLD { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(RUN_ESMFOLD.out.multiqc.collect()) + ch_multiqc_files = ch_multiqc_files.mix(RUN_ESMFOLD.out.multiqc.map{it[1]}.collect()) MULTIQC ( ch_multiqc_files.collect(), @@ -113,8 +113,9 @@ workflow ESMFOLD { } emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + pdb = RUN_ESMFOLD.out.pdb // channel: /path/to/*pdb + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* From 586798c202a0d3d7c4a45e834fa51d113ec73db1 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Tue, 1 Oct 2024 13:22:28 +1000 Subject: [PATCH 019/123] configure the reports --- bin/generat_report.py | 20 +++++++++++--------- main.nf | 5 ++++- modules/local/colabfold_batch.nf | 10 +++++++--- modules/local/generat_report.nf | 2 ++ nextflow_schema.json | 2 +- tower.yml | 6 ++++++ workflows/alphafold2.nf | 8 ++++---- workflows/colabfold.nf | 4 +++- 8 files changed, 38 insertions(+), 19 deletions(-) diff --git a/bin/generat_report.py b/bin/generat_report.py index dcb48911..ab8e9df0 100755 --- a/bin/generat_report.py +++ b/bin/generat_report.py @@ -1,8 +1,8 @@ #!/usr/bin/env python import os -from matplotlib import pyplot as plt import argparse +from matplotlib import pyplot as plt from collections import OrderedDict import base64 import os @@ -293,8 +293,11 @@ def pdb_to_lddt(pdb_files, generate_tsv): return output_lddt, averages + + print("Starting...") +version = '1.0.0' parser = argparse.ArgumentParser() parser.add_argument('--type', dest='in_type') parser.add_argument('--generate_tsv', choices=['y', 'n'], default = 'n', dest='generate_tsv') @@ -303,6 +306,7 @@ def pdb_to_lddt(pdb_files, generate_tsv): parser.add_argument('--name', dest='name') parser.add_argument('--output_dir',dest='output_dir') parser.add_argument('--html_template',dest='html_template') +parser.add_argument('--version', action='version', version=f'{version}') parser.set_defaults(output_dir='') parser.set_defaults(in_type='ESM-FOLD') parser.set_defaults(name='') @@ -372,15 +376,13 @@ def pdb_to_lddt(pdb_files, generate_tsv): lddt_html = in_file.read() alphafold_template = alphafold_template.replace("
", lddt_html) -""" -with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.html", "r") as in_file: - alphafold_template = alphafold_template.replace(f"seq_coverage.png", f"{in_file.read()}") - -for i in range(0, 5): - with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.html", "r") as in_file: - alphafold_template = alphafold_template.replace(f"coverage_LDDT_{i}.png", f"{in_file.read()}") +if False: + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.html", "r") as in_file: + alphafold_template = alphafold_template.replace(f"seq_coverage.png", f"{in_file.read()}") -""" + for i in range(0, 5): + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.html", "r") as in_file: + alphafold_template = alphafold_template.replace(f"coverage_LDDT_{i}.png", f"{in_file.read()}") with open(f"{args.output_dir}/{args.name}_{args.in_type}_report.html", "w") as out_file: out_file.write(alphafold_template) diff --git a/main.nf b/main.nf index e41745fa..53247245 100644 --- a/main.nf +++ b/main.nf @@ -155,6 +155,9 @@ workflow NFCORE_PROTEINFOLD { ) ch_multiqc = COLABFOLD.out.multiqc_report ch_versions = ch_versions.mix(COLABFOLD.out.versions) + ch_report_input = ch_report_input.mix( + COLABFOLD.out.pdb.join(COLABFOLD.out.msa).map{it[0]["model"] = "COLABFOLD"; it} + ) } // @@ -198,9 +201,9 @@ workflow NFCORE_PROTEINFOLD { ch_report_input.map{it[0].model}, Channel.fromPath("$projectDir/assets/proteinfold_template.html").first() ) + ch_versions = ch_versions.mix(GENERATE_REPORT.out.versions) } - emit: multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [version1, version2, ...] diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch.nf index 691e4bd8..7e304eaa 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch.nf @@ -20,9 +20,8 @@ process COLABFOLD_BATCH { output: tuple val(meta), path ("*_relaxed_rank_*.pdb") , emit: pdb tuple val(meta), path ("*_coverage.png") , emit: msa - tuple val(meta), path ("*_scores_rank.json") , emit: scores - path ("*_mqc.png") , emit: multiqc - path "versions.yml" , emit: versions + tuple val(meta), path ("*_mqc.png") , emit: multiqc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -54,6 +53,11 @@ process COLABFOLD_BATCH { """ touch ./"${fasta.baseName}"_colabfold.pdb touch ./"${fasta.baseName}"_mqc.png + touch ./${fasta.baseName}_relaxed_rank_01.pdb + touch ./${fasta.baseName}_relaxed_rank_02.pdb + touch ./${fasta.baseName}_relaxed_rank_03.pdb + touch ./${fasta.baseName}_coverage.png + touch ./${fasta.baseName}_scores_rank.json cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/generat_report.nf b/modules/local/generat_report.nf index 77ded432..5733baa1 100644 --- a/modules/local/generat_report.nf +++ b/modules/local/generat_report.nf @@ -36,6 +36,7 @@ process GENERATE_REPORT { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') + generate_report.py: \$(python3 --version) END_VERSIONS """ @@ -46,6 +47,7 @@ process GENERATE_REPORT { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') + generate_report.py: \$(python3 --version) END_VERSIONS """ } diff --git a/nextflow_schema.json b/nextflow_schema.json index dc83e19a..9baab54b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -206,7 +206,7 @@ }, "skip_visualisation": { "type": "boolean", - "description": "Skip Visualisation reports.", + "description": "Skip visualisation reports.", "fa_icon": "fas fa-fast-forward" } } diff --git a/tower.yml b/tower.yml index 7fc70c10..47e1860d 100644 --- a/tower.yml +++ b/tower.yml @@ -7,3 +7,9 @@ reports: display: "COLABFOLD MultiQC HTML report" samplesheet.csv: display: "Auto-created samplesheet with collated metadata and FASTQ paths" + "*_ALPHAFOLD2_report.html": + display: "ALPHAFOLD2 - Predected structures" + "*_ESMFOLD_report.html": + display: "ESMFOLD - Predected structures" + "*_COLABFOLD_report.html": + display: "COLABFOLD - Predected structures" \ No newline at end of file diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 6a470649..97882481 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -171,10 +171,10 @@ workflow ALPHAFOLD2 { ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) MULTIQC ( - ch_multiqc_files.collect(sort: true), - ch_multiqc_config.toSortedList(), - ch_multiqc_custom_config.toSortedList(), - ch_multiqc_logo.toSortedList() + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() ) ch_multiqc_report = MULTIQC.out.report.toList() } diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index eafc222c..c130f923 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -152,7 +152,7 @@ workflow COLABFOLD { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(COLABFOLD_BATCH.out.multiqc.collect()) + ch_multiqc_files = ch_multiqc_files.mix(COLABFOLD_BATCH.out.multiqc.map{it[1]}.collect()) MULTIQC ( ch_multiqc_files.collect(), @@ -164,6 +164,8 @@ workflow COLABFOLD { } emit: + pdb = COLABFOLD_BATCH.out.pdb // channel: /path/to/*.pdb + msa = COLABFOLD_BATCH.out.msa // channel: /path/to/*_coverage.png multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From 6985dcd7e35a86d1f9af328bd16a9e8bd816fd35 Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Tue, 1 Oct 2024 14:14:53 +1000 Subject: [PATCH 020/123] rename esmfold output pdb --- bin/generat_report.py | 2 +- modules/local/run_esmfold.nf | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/generat_report.py b/bin/generat_report.py index ab8e9df0..9a353846 100755 --- a/bin/generat_report.py +++ b/bin/generat_report.py @@ -308,7 +308,7 @@ def pdb_to_lddt(pdb_files, generate_tsv): parser.add_argument('--html_template',dest='html_template') parser.add_argument('--version', action='version', version=f'{version}') parser.set_defaults(output_dir='') -parser.set_defaults(in_type='ESM-FOLD') +parser.set_defaults(in_type='ESMFOLD') parser.set_defaults(name='') args = parser.parse_args() diff --git a/modules/local/run_esmfold.nf b/modules/local/run_esmfold.nf index bc1ee611..633d3fab 100644 --- a/modules/local/run_esmfold.nf +++ b/modules/local/run_esmfold.nf @@ -36,7 +36,9 @@ process RUN_ESMFOLD { awk '{print \$2"\\t"\$3"\\t"\$4"\\t"\$6"\\t"\$11}' "${fasta.baseName}"*.pdb | grep -v 'N/A' | uniq > plddt.tsv echo -e Atom_serial_number"\\t"Atom_name"\\t"Residue_name"\\t"Residue_sequence_number"\\t"pLDDT > header.tsv cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv - #mv "${fasta.baseName}"*.pdb ${fasta.baseName}.pdb + mv "${fasta.baseName}"*.pdb tmp.pdb + mv tmp.pdb ${fasta.baseName}.pdb + cat <<-END_VERSIONS > versions.yml "${task.process}": esm-fold: $VERSION From 81e877ec6c16f34ded0c772f8667274c7408563b Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Wed, 2 Oct 2024 11:13:25 +1000 Subject: [PATCH 021/123] average, tooltip & msa png --- assets/proteinfold_template.html | 63 ++++++++++++++++++++++++-------- bin/generat_report.py | 9 +++-- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/assets/proteinfold_template.html b/assets/proteinfold_template.html index 57cfe3d7..64820aaa 100644 --- a/assets/proteinfold_template.html +++ b/assets/proteinfold_template.html @@ -14,6 +14,8 @@ crossorigin="anonymous" > + + - - - - - - - - - - - - - - -
- -
- -
- - - - - -
- -
- - -
-
-
-
-
-
<50
-
70
-
90+
-
-
-
- -
-

- Alphafold produces a - - per-residue confidence score (pLDDT) - - between 0 and 100. Some regions below 50 pLDDT may be unstructured in isolation. -

-
-
- - - - - - -
- +
- - -
-
-
Information
- -
-
-
Program: *prog_name*
-
ID: *sample_name*
+ class="h-auto w-40 min-w-32" + src="" + /> +
+
Protein structure prediction
+
+ -
Average:
-
- -
-
Navigation
- - -
-
- Scroll up/down - to zoom in and out -
-
- Click + drag - to rotate the structure -
-
- CTRL + click + drag - to move the structure -
-
- Click - an atom to bring it into focus -
+
+ + + + + + + +
+ +
+ +
+ + -
-
- - -
-
-
Toggle representations
-
- - - - + +
+
-
- -
-
-
-
Actions
-
- + + +
+
-
-
+ +
+ +
+
+
Information
+ +
+
+
Program: *prog_name*
+
ID: *sample_name*
+
+
Average:
+
+
+
Navigation
+ +
+
+ Scroll up/down + to zoom in and out +
+
+ Click + drag + to rotate the structure +
+
+ CTRL + click + drag + to move the structure +
+
+ Click + an atom to bring it into focus +
+
+
+
+ +
+
+
Toggle representations
+
+ + + + +
+
+
+
+
+
Actions
+
+ + +
+
+
+
Download
+
+ + +
+
+
+
+
+
-
- -
-
-
-
-
-
-
- - -
-
-
Sequence Coverage
-
-
- -
- -
+
+
+
+
+
+
- -
-
pLDDT
-
- -
-
-
-
+ +
+
+
Sequence Coverage
+
+
+ +
+ +
+
+
+
pLDDT
+
+ +
+
+
+
+
+
- - -
- - -
-
-
- - - -
-
-

- The Australian BioCommons - is supported by - Bioplatforms Australia -

-

- Bioplatforms Australia - is enabled by - NCRIS -

-
+ +
+
+
+ + + +
+
+

+ The Australian BioCommons + is supported by + Bioplatforms Australia +

+

+ Bioplatforms Australia + is enabled by + NCRIS +

+
+
-
- - - + } + }); + }; + + const updateButtons = () => { + MODELS.forEach((name, i) => { + const id = `#btn-${name.replace(".pdb", "")}`; + const btn = document.querySelector(id); + if (!btn) return; + i == state.model ? btn.classList.add("selected") : btn.classList.remove("selected"); + }); + + REPRESENTATIONS.forEach((name) => { + const id = `#btn-${name}`.replace("+", "-"); + const btn = document.querySelector(id); + if (!btn) return; + if (name in state.representations) { + btn.classList.add("selected"); + } else { + btn.classList.remove("selected"); + } + }); + + // Show "Nothing to display" if no representations are selected + document.querySelector("#ngl-nothing").style.display = Object.keys(state.representations).length + ? "none" + : "flex"; + }; + From b32a64772b363ffd1e9a3cd9280c1be4846c824d Mon Sep 17 00:00:00 2001 From: Ziad Al Bkhetan Date: Wed, 2 Oct 2024 16:41:56 +1000 Subject: [PATCH 028/123] lint report --- assets/proteinfold_template.html | 1676 +++++++++++++++--------------- 1 file changed, 838 insertions(+), 838 deletions(-) diff --git a/assets/proteinfold_template.html b/assets/proteinfold_template.html index 17ee85a6..7016d326 100644 --- a/assets/proteinfold_template.html +++ b/assets/proteinfold_template.html @@ -1,270 +1,270 @@ - - - - - Protein structure prediction - - - - - - - - - - -