diff --git a/.editorconfig b/.editorconfig index 72dda289..15d11df6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -31,3 +31,8 @@ indent_size = unset # ignore python and markdown [*.{py,md}] indent_style = unset + +# ignore parameters.md +[parameters.md] +trim_trailing_whitespace = false +indent_style = unset diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 56aa7666..fe701aed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,9 @@ concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" cancel-in-progress: true +permissions: + checks: write + jobs: test: name: Run pipeline with test data @@ -25,7 +28,7 @@ jobs: matrix: parameters: - "" - - "--input https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/testdata/samplesheet_multisample_bam.csv --split_fastq 2 --parallel_snv 1 --phaser hiphase_sv" + - "--preset ONT_R10 --input https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam_ont.csv --parallel_alignments 2 --parallel_snv 1" NXF_VER: - "23.04.0" - "latest-everything" @@ -44,3 +47,61 @@ jobs: - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} + nftest: + name: ${{ matrix.tags }} ${{ matrix.profile }} NF-${{ matrix.NXF_VER }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + NXF_VER: + - "latest-everything" + - "23.04.0" + tags: + - "SHORT_VARIANT_CALLING" + - "SNV_ANNOTATION" + - "samplesheet" + - "samplesheet_multisample_bam" + profile: + - "docker" + + steps: + - name: Check out pipeline code + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + with: + version: "${{ matrix.NXF_VER }}" + + - uses: nf-core/setup-nf-test@v1 + + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff + + - name: Run nf-test + run: | + nf-test test --verbose --tag ${{ matrix.tags }} --profile "+${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap + + - uses: pcolby/tap-summary@v1 + with: + path: >- + test.tap + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/meta/nextflow.log + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: test.xml diff --git a/.gitignore b/.gitignore index 5124c9ac..2eef655b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +.nf-test* diff --git a/.nf-core.yml b/.nf-core.yml index 168083fe..cb773092 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,6 +2,7 @@ lint: files_exist: - CODE_OF_CONDUCT.md - assets/nf-core-nallo_logo_light.png + - docs/README.md - docs/images/nf-core-nallo_logo_light.png - docs/images/nf-core-nallo_logo_dark.png - .github/ISSUE_TEMPLATE/config.yml @@ -11,10 +12,12 @@ lint: files_unchanged: - CODE_OF_CONDUCT.md - assets/nf-core-nallo_logo_light.png + - docs/README.md - docs/images/nf-core-nallo_logo_light.png - docs/images/nf-core-nallo_logo_dark.png - .github/ISSUE_TEMPLATE/bug_report.yml - .github/CONTRIBUTING.md + - .prettierignore multiqc_config: - report_comment nextflow_config: diff --git a/.prettierignore b/.prettierignore index 437d763d..ecbdd5b2 100644 --- a/.prettierignore +++ b/.prettierignore @@ -4,6 +4,7 @@ slackreport.json .nextflow* work/ data/ +docs/parameters.md results/ .DS_Store testing/ diff --git a/CHANGELOG.md b/CHANGELOG.md index cb3b5ec9..d3277ba2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,132 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v0.3.0 - [2024-08-29] + +### `Added` + +- [#230](https://github.com/genomic-medicine-sweden/nallo/pull/230) - Added nf-test to the short variant calling workflow +- [#231](https://github.com/genomic-medicine-sweden/nallo/pull/231) - Added initial tests for ONT data +- [#234](https://github.com/genomic-medicine-sweden/nallo/pull/234) - Added a `--deepvariant_model_type` parameter to override the model type set by `--preset` +- [#239](https://github.com/genomic-medicine-sweden/nallo/pull/239) - Added initial nf-test to the pipeline +- [#243](https://github.com/genomic-medicine-sweden/nallo/pull/243) - Added nf-test to the short variant annotation workflow +- [#245](https://github.com/genomic-medicine-sweden/nallo/pull/245) - Added repeat annotation with Stranger +- [#252](https://github.com/genomic-medicine-sweden/nallo/pull/252) - Added a new `SCATTER_GENOME` subworkflow +- [#255](https://github.com/genomic-medicine-sweden/nallo/pull/255) - Added a new `RANK_VARIANTS` subworkflow to rank SNVs using genmod +- [#261](https://github.com/genomic-medicine-sweden/nallo/pull/261) - Added a `--skip_rank_variants` parameter to skip the rank_variants subworkflow +- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Added a `project` column to the sampleheet +- [#266](https://github.com/genomic-medicine-sweden/nallo/pull/266) - Added CADD to dynamically calculate indel CADD-scores +- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Added SNV phasing stats to MultiQC +- [#271](https://github.com/genomic-medicine-sweden/nallo/pull/271) - Added a `--skip_aligned_read_qc` parameter to skip the qc aligned reads subworkflow +- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Added a `--vep_plugin_files` parameter to separate VEP plugins from cache +- [#320](https://github.com/genomic-medicine-sweden/nallo/pull/320) - Added complete citations to CITATIONS.md and MultiQC report + +### `Changed` + +- [#232](https://github.com/genomic-medicine-sweden/nallo/pull/232) - Changed to softer `--preset` requirements, non-supported subworkflows can now be explicitly enabled if necessary +- [#232](https://github.com/genomic-medicine-sweden/nallo/pull/232) - Changed `--skip_repeat_wf` to default to true for preset ONT_R10 +- [#233](https://github.com/genomic-medicine-sweden/nallo/pull/233) - Changed the CNV calling workflow to allow calling using ONT data +- [#235](https://github.com/genomic-medicine-sweden/nallo/pull/235) - Changed the ONT_R10 preset to not allow phasing with HiPhase +- [#240](https://github.com/genomic-medicine-sweden/nallo/pull/240) - Reorganize processes in the snv annotation and short variant calling workflows +- [#240](https://github.com/genomic-medicine-sweden/nallo/pull/240) - GLNexus multisample output is now decomposed and normalized +- [#244](https://github.com/genomic-medicine-sweden/nallo/pull/244) - Updated VEP with more annotations +- [#245](https://github.com/genomic-medicine-sweden/nallo/pull/245) - Merged (multisample) repeats from TRGT is now output even if there's only one sample +- [#245](https://github.com/genomic-medicine-sweden/nallo/pull/245) - Split the repeat analysis workflow into one calling and one annotation workflow, `--skip_repeat_wf` becomes `--skip_repeat_calling` and `--skip_repeat_annotation` +- [#246](https://github.com/genomic-medicine-sweden/nallo/pull/246) - Renamed processes and light refactoring of the short variant calling workflow +- [#246](https://github.com/genomic-medicine-sweden/nallo/pull/246) - Use groupKey to remove bottleneck in the short variant calling workflow +- [#247](https://github.com/genomic-medicine-sweden/nallo/pull/247) - Updated nft-bam to 0.3.0 and added BAM reads to snapshot +- [#247](https://github.com/genomic-medicine-sweden/nallo/pull/247) - Changed minimap2 preset from `map-ont` to `lr:hq` for `--preset ONT_R10` +- [#250](https://github.com/genomic-medicine-sweden/nallo/pull/250) - Run mosdepth with `--fast-mode` and add to MultiQC report +- [#251](https://github.com/genomic-medicine-sweden/nallo/pull/251) - Switched from annotating single sample VCFs to annotating a multisample VCF, splitting the VCF per sample afterwards to keep outputs almost consistent +- [#256](https://github.com/genomic-medicine-sweden/nallo/pull/256) - Changed Stranger to annotate single-sample VCFs instead of a multi-sample VCF +- [#258](https://github.com/genomic-medicine-sweden/nallo/pull/258) - Updated test profile parameters to speed up tests +- [#260](https://github.com/genomic-medicine-sweden/nallo/pull/260) - Updated DeepVariant to 1.6.1 and htslib (tabix) to 1.20 +- [#261](https://github.com/genomic-medicine-sweden/nallo/pull/261) - Changed SNV annotation to run in parallel +- [#261](https://github.com/genomic-medicine-sweden/nallo/pull/261) - Changed SNV output file names and directory structure +- [#262](https://github.com/genomic-medicine-sweden/nallo/pull/262) - Updated README +- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Changed PED file creation from groovy script to process +- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Changed all `multisample` filenames to `{project}` from samplesheet +- [#268](https://github.com/genomic-medicine-sweden/nallo/pull/268) - Only output unphased alignments when phasing is off +- [#268](https://github.com/genomic-medicine-sweden/nallo/pull/268) - Changed alignment output file names and directory structure +- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Changed whatshap stats to always run, regardless of phasing software, and changed the output from `*.stats.tsv.gz` to `*.stats.tsv` to allow being picked up by MultiQC +- [#277](https://github.com/genomic-medicine-sweden/nallo/pull/277) - Allowed CNV calling as soon as SNV calling for a sample is finished +- [#278](https://github.com/genomic-medicine-sweden/nallo/pull/278) - Changed the SNV ranking to run in parallel per region +- [#300](https://github.com/genomic-medicine-sweden/nallo/pull/300) - Clarified and formatted nallo.nf +- [#304](https://github.com/genomic-medicine-sweden/nallo/pull/304) - Changed to treat (u)BAM as the primary input by skipping fastq conversion before aligning +- [#306](https://github.com/genomic-medicine-sweden/nallo/pull/306) - Updated echtvar version +- [#307](https://github.com/genomic-medicine-sweden/nallo/pull/307) - Changed somalier relate to also run per sample on sampes with unknown sex, removing the need to wait on all samples to finish aligment before starting variant calling +- [#307](https://github.com/genomic-medicine-sweden/nallo/pull/307) - Changed the removal of n_files from meta from bam_infer_sex to nallo.nf +- [#308](https://github.com/genomic-medicine-sweden/nallo/pull/308) - Updated nf-core modules, fixed warnings in local modules, added Dockerfile to fqcrs +- [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet +- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant +- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Changed VEP annotation added in #244 to not include SpliceAI +- [#317](https://github.com/genomic-medicine-sweden/nallo/pull/317) - Changed so that `--reduced_penetrance` and `--score_config_snv` is required by rank variants and not SNV annotation +- [#318](https://github.com/genomic-medicine-sweden/nallo/pull/318) - Updated docs and schema to clarify pipeline usage +- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference +- [#323](https://github.com/genomic-medicine-sweden/nallo/pull/323) - Changed `parallel_alignment` to `parallel_alignments` in CI tests as well +- [#330](https://github.com/genomic-medicine-sweden/nallo/pull/330) - Updated README and version bump +- [#332](https://github.com/genomic-medicine-sweden/nallo/pull/332) - Changed the PED file input to genmod to include inferred sex from somalier +- [#333](https://github.com/genomic-medicine-sweden/nallo/pull/333) - Updated TRGT to 0.7.0 and added `meta.id` as output sample name + +### `Removed` + +- [#237](https://github.com/genomic-medicine-sweden/nallo/pull/237) - Removed the CONVERT_ONT_READNAMES module that was run before calling repeats with TRGT +- [#238](https://github.com/genomic-medicine-sweden/nallo/pull/238) - Removed the `--extra_gvcfs` parameter +- [#243](https://github.com/genomic-medicine-sweden/nallo/pull/243) - Removed VEP report from output files +- [#257](https://github.com/genomic-medicine-sweden/nallo/pull/257) - Removed obsolete TODO statements +- [#258](https://github.com/genomic-medicine-sweden/nallo/pull/258) - Removed VCF report from DeepVariant output +- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Removed the option to provide extra SNF files to Sniffles with `--extra_snfs` +- [#305](https://github.com/genomic-medicine-sweden/nallo/pull/305) - Removed unused local module bcftools view regions +- [#319](https://github.com/genomic-medicine-sweden/nallo/pull/319) - Removed samtools reset before samtools fastq when converting BAM to FASTQ + +### `Fixed` + +- [#231](https://github.com/genomic-medicine-sweden/nallo/pull/231) - Fixed certain tags in input BAM files being transfered over to (re)aligned BAM +- [#252](https://github.com/genomic-medicine-sweden/nallo/pull/252) - Fixed duplicate SNVs in outputs when providing a BED-regions with overlapping regions +- [#267](https://github.com/genomic-medicine-sweden/nallo/pull/267) - Fixed warning where `MODKIT_PILEUP_HAPLOTYPES` would be defined more than once +- [#300](https://github.com/genomic-medicine-sweden/nallo/pull/300) - Fixed missing paraphase version + +### Parameters + +| Old parameter | New parameter | +| ------------------ | -------------------------- | +| `--skip_repeat_wf` | `--skip_repeat_calling` | +| `--skip_repeat_wf` | `--skip_repeat_annotation` | +| | `--deepvariant_model_type` | +| | `--skip_rank_variants` | +| | `--skip_aligned_read_qc` | +| | `--cadd_resources` | +| | `--cadd_prescored` | +| `--split_fastq` | `--parallel_alignments` | +| `--extra_gvcfs` | | +| `--extra_snfs` | | +| `--dipcall_par` | `--par_regions` | +| | `--vep_plugin_files` | + +> [!NOTE] +> Parameter has been updated if both old and new parameter information is present. +> Parameter has been added if just the new parameter information is present. +> Parameter has been removed if new parameter information isn't present. + +### Module updates + +| Tool | Old version | New version | +| --------------------------- | ----------- | ----------- | +| deepvariant | 1.5.0 | 1.6.1 | +| tabix | 1.19.1 | 1.20 | +| echtvar | 0.1.7 | 0.2.0 | +| somalier | 0.2.15 | 0.2.18 | +| TRGT | 0.4.0 | 0.7.0 | +| cadd | | 1.6.post1 | +| gawk | | 5.3.0 | +| add_most_severe_consequence | | v1.0 | +| add_most_severe_pli | | v1.0 | +| create_pedigree_file | | v1.0 | +| genmod | | 3.8.2 | +| stranger | | 0.9.1 | +| splitubam | | 0.1.1 | +| fastp | 0.23.4 | | + ## v0.2.0 - [2024-06-26] ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 0af6937e..811889b7 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,10 +10,78 @@ ## Pipeline tools +- [BCFtools](https://academic.oup.com/gigascience/article/10/2/giab008/6137722) & [SAMtools](https://academic.oup.com/bioinformatics/article/25/16/2078/204688) + + > Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008 + +- [BEDTools](https://academic.oup.com/bioinformatics/article/26/6/841/244688) + + > Quinlan AR and Hall IM, 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 26, 6, pp. 841–842. + +- [cramino](https://academic.oup.com/bioinformatics/article/39/5/btad311/7160911) + + > Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311 + +- [CADD1](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9), [2](https://academic.oup.com/nar/article/47/D1/D886/5146191) + + > Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9 + + > Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016 + +- [DeepVariant](https://www.nature.com/articles/nbt.4235) + + > Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235 + +- [dipcall](https://www.nature.com/articles/s41592-018-0054-7) + + > Li H, Bloom JM, Farjoun Y, Fleharty M, Gauthier L, Neale B, MacArthur D (2018) A synthetic-diploid benchmark for accurate variant-calling evaluation. Nat Methods, 15:595-597. [PMID:30013044] + +- [echtvar](https://academic.oup.com/nar/article/51/1/e3/6775383) + + > Brent S Pedersen, Jeroen de Ridder, Echtvar: compressed variant representation for rapid annotation and filtering of SNPs and indels, Nucleic Acids Research, Volume 51, Issue 1, 11 January 2023, Page e3, https://doi.org/10.1093/nar/gkac931 + +- [Ensembl VEP](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0974-4) + + > McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome Biol. 2016;17(1):122. doi:10.1186/s13059-016-0974-4 + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. +- [fqcrs](https://github.com/fellen31/fqcrs) + +- [Genmod](https://github.com/Clinical-Genomics/genmod) + + > Magnusson M, Hughes T, Glabilloy, Bitdeli Chef. genmod: Version 3.7.3. Published online November 15, 2018. doi:10.5281/ZENODO.3841142 + +- [Gfastats](https://academic.oup.com/bioinformatics/article/38/17/4214/6633308) + + > Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristóbal Gallardo-Alba, Alice Giani, Olivier Fedrigo, Erich D Jarvis, Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs, Bioinformatics, Volume 38, Issue 17, September 2022, Pages 4214–4216, https://doi.org/10.1093/bioinformatics/btac460 + +- [GLnexus](https://academic.oup.com/bioinformatics/article/36/24/5582/6064144) + + > Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081 + +- [hifiasm](https://www.nature.com/articles/s41592-020-01056-5) + + > Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5 + +- [HiPhase](https://academic.oup.com/bioinformatics/article/40/2/btae042/7588891) + + > James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042 + +- [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) + +- [minimap2](https://academic.oup.com/bioinformatics/article/34/18/3094/4994778) + + > Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191 + +- [modkit](https://github.com/nanoporetech/modkit) + +- [Mosdepth](https://academic.oup.com/bioinformatics/article/34/5/867/4583630?login=true) + + > Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699 + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. @@ -22,6 +90,34 @@ > Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294 +- [Sniffles2](https://www.nature.com/articles/s41587-023-02024-y) + + > Smolka, M., Paulin, L.F., Grochowski, C.M. et al. Detection of mosaic and population-level structural variants with Sniffles2. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02024-y + +- [Somalier](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-020-00761-2) + + > Pedersen, B.S., Bhetariya, P.J., Brown, J. et al. Somalier: rapid relatedness estimation for cancer and germline studies using efficient genome sketches. Genome Med 12, 62 (2020). https://doi.org/10.1186/s13073-020-00761-2 + +- [splitubam](https://github.com/fellen31/splitubam) + +- [stranger](https://github.com/Clinical-Genomics/stranger) + + > Nilsson D, Magnusson M. moonso/stranger v0.7.1. Published online February 18, 2021. doi:10.5281/ZENODO.4548873 + +- [Tabix](https://academic.oup.com/bioinformatics/article/27/5/718/262743) + + > Li H. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics. 2011;27(5):718-719. doi:10.1093/bioinformatics/btq671 + +- [TRGT](https://www.nature.com/articles/s41587-023-02057-3) + + > Dolzhenko, E., English, A., Dashnow, H. et al. Characterization and visualization of tandem repeats at genome scale. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02057-3 + +- [WhatsHap](https://www.biorxiv.org/content/10.1101/085050v2) + + > Marcel Martin, Murray Patterson, Shilpa Garg, Sarah O Fischer, Nadia Pisanti, Gunnar W Klau, Alexander Schöenhuth, Tobias Marschall. bioRxiv 085050; doi: https://doi.org/10.1101/085050 + +- [yak](https://github.com/lh3/yak) + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index f095a533..f22658d0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![GitHub Actions CI Status](https://github.com/genomic-medicine-sweden/nallo/actions/workflows/ci.yml/badge.svg)](https://github.com/genomic-medicine-sweden/nallo/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/genomic-medicine-sweden/nallo/actions/workflows/linting.yml/badge.svg)](https://github.com/genomic-medicine-sweden/nallo/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/genomic-medicine-sweden/nallo/actions/workflows/linting.yml/badge.svg)](https://github.com/genomic-medicine-sweden/nallo/actions/workflows/linting.yml) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) @@ -12,11 +12,6 @@ **genomic-medicine-sweden/nallo** is a bioinformatics analysis pipeline for long-read rare disease SV/SNV identification using both PacBio and (targeted) ONT-data. Heavily influenced by best-practice pipelines such as [nf-core/nanoseq](https://github.com/nf-core/nanoseq), [nf-core/sarek](https://nf-co.re/sarek), [nf-core/raredisease](https://nf-co.re/raredisease), [PacBio Human WGS Workflow](https://github.com/PacificBiosciences/pb-human-wgs-workflow-snakemake), [epi2me-labs/wf-human-variation](https://github.com/epi2me-labs/wf-human-variation) and [brentp/rare-disease-wf](https://github.com/brentp/rare-disease-wf). - - - - ## Pipeline summary ##### QC @@ -34,24 +29,24 @@ - Short variant calling & joint genotyping of SNVs ([`deepvariant`](https://github.com/google/deepvariant) + [`GLNexus`](https://github.com/dnanexus-rnd/GLnexus)) - SV calling and joint genotyping ([`sniffles2`](https://github.com/fritzsedlazeck/Sniffles)) -- Tandem repeats ([`TRGT`](https://github.com/PacificBiosciences/trgt/tree/main)) +- Tandem repeats (HiFi only) ([`TRGT`](https://github.com/PacificBiosciences/trgt/tree/main)) - Assembly based variant calls (HiFi only) ([`dipcall`](https://github.com/lh3/dipcall)) -- CNV-calling (HiFi only) ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV)) +- CNV-calling ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV)) - Call paralogous genes ([`Paraphase`](https://github.com/PacificBiosciences/paraphase)) ##### Phasing and methylation - Phase and haplotag reads ([`whatshap`](https://github.com/whatshap/whatshap) + [`hiphase`](https://github.com/PacificBiosciences/HiPhase)) -- Methylation pileups (Revio/ONT) ([`modkit`](https://github.com/nanoporetech/modkit)) +- Methylation pileups ([`modkit`](https://github.com/nanoporetech/modkit)) -##### Annotation - SNV +##### Annotation -1. Annotate variants with database(s) of choice, i.e. [gnomAD](https://gnomad.broadinstitute.org), [CADD](https://cadd.gs.washington.edu) etc. ([`echtvar`](https://github.com/brentp/echtvar)) -2. Annotate variants ([`VEP`](https://github.com/Ensembl/ensembl-vep)) +- Annotate SNVs and INDELs with database(s) of choice, i.e. [gnomAD](https://gnomad.broadinstitute.org), [CADD](https://cadd.gs.washington.edu) etc. ([`echtvar`](https://github.com/brentp/echtvar) and [`VEP`](https://github.com/Ensembl/ensembl-vep)) +- Annotate repeat expansions with [stranger](https://github.com/Clinical-Genomics/stranger) -##### Filtering +##### Filtering and ranking -- TBD +- Rank variants ([`GENMOD`](https://github.com/Clinical-Genomics/genmod)) ## Usage @@ -63,9 +58,9 @@ Prepare a samplesheet with input data: `samplesheet.csv` ``` -sample,file,family_id,paternal_id,maternal_id,sex,phenotype -HG002,/path/to/HG002.fastq.gz,FAM1,HG003,HG004,1,1 -HG005,/path/to/HG005.bam,FAM1,HG003,HG004,2,1 +project,sample,file,family_id,paternal_id,maternal_id,sex,phenotype +testrun,HG002,/path/to/HG002.fastq.gz,FAM1,HG003,HG004,1,2 +testrun,HG005,/path/to/HG005.bam,FAM1,HG003,HG004,2,1 ``` Now, you can run the pipeline using: @@ -74,12 +69,8 @@ Now, you can run the pipeline using: nextflow run genomic-medicine-sweden/nallo -profile YOURPROFILE \ --input samplesheet.csv \ --preset \ - --outdir \ --fasta \ - --skip_assembly_wf \ - --skip_repeat_wf \ - --skip_snv_annotation \ - --skip_cnv_calling + --outdir ``` For more details and further functionality, please refer to the [usage documentation](https://github.com/genomic-medicine-sweden/nallo/blob/dev/docs/usage.md). @@ -100,19 +91,12 @@ genomic-medicine-sweden/nallo was originally written by Felix Lenner. We thank the following people for their extensive assistance in the development of this pipeline: - - ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). ## Citations - - - - - This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). > **The nf-core framework for community-curated bioinformatics pipelines.** diff --git a/assets/cadd_to_vcf_header_-1.0-.txt b/assets/cadd_to_vcf_header_-1.0-.txt new file mode 100644 index 00000000..8deee482 --- /dev/null +++ b/assets/cadd_to_vcf_header_-1.0-.txt @@ -0,0 +1 @@ +##INFO= diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index ef18989c..9d78bfcd 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,7 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "genomic-medicine-sweden/nallo Methods Description" section_href: "https://github.com/genomic-medicine-sweden/nallo" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 95eddeb2..c829f8e3 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,6 +1,6 @@ report_comment: > - This report has been generated by the genomic-medicine-sweden/nallo + This report has been generated by the genomic-medicine-sweden/nallo analysis pipeline. report_section_order: diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 6ef9313b..f1af5c06 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ sample,file,family_id,paternal_id,maternal_id,sex,phenotype -sample_1,/path/to/fastq_or_bam/files/sample_1.fastq.gz,FAM,PAT,MAT,0,1 -sample_2,/path/to/fastq_or_bam/files/sample_2.bam,FAM,PAT,MAT,1,1 +sample_1,/path/to/fastq_or_bam/files/sample_1.fastq.gz,FAM,0,0,0,2 +sample_2,/path/to/fastq_or_bam/files/sample_2.bam,FAM,0,0,1,1 diff --git a/assets/schema_gvcfs.json b/assets/schema_gvcfs.json deleted file mode 100644 index 0ae2d250..00000000 --- a/assets/schema_gvcfs.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_gvcfs.json", - "title": "genomic-medicine-sweden/nallo pipeline - params.extra_gvcfs schema", - "description": "Schema for the file provided with params.extra_gvcfs", - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] - }, - "file": { - "format": "file-path", - "type": "string", - "pattern": "^\\S+\\.(g\\.)?(g)?vcf\\.gz$", - "errorMessage": "gVCF file must be provided, cannot contain spaces and must have extension 'g.vcf.gz' or 'gvcf.gz'" - } - }, - "required": ["sample", "file"] - } -} diff --git a/assets/schema_input.json b/assets/schema_input.json index 269a3085..46c00ba6 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -28,13 +28,13 @@ "paternal_id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Paternal ID must be provided and cannot contain spaces. If no paternal ID is available, use any ID not in sample column.", + "errorMessage": "Paternal ID must be provided and cannot contain spaces. If no paternal ID is available, use 0.", "meta": ["paternal_id"] }, "maternal_id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Maternal ID must be provided and cannot contain spaces. If no maternal ID is available, use any ID not in sample column.", + "errorMessage": "Maternal ID must be provided and cannot contain spaces. If no maternal ID is available, use 0.", "meta": ["maternal_id"] }, "sex": { @@ -48,8 +48,14 @@ "enum": [0, 1, 2], "errorMessage": "Phenoype must be provided as 0 (missing), 1 (unaffected) or 2 (affected)", "meta": ["phenotype"] + }, + "project": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Project name must be provided and cannot contain spaces, needs to be the same for all samples", + "meta": ["project"] } }, - "required": ["sample", "file", "family_id", "paternal_id", "maternal_id", "sex", "phenotype"] + "required": ["sample", "file", "family_id", "paternal_id", "maternal_id", "sex", "phenotype", "project"] } } diff --git a/assets/schema_snfs.json b/assets/schema_snfs.json deleted file mode 100644 index 59d45232..00000000 --- a/assets/schema_snfs.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_snfs.json", - "title": "genomic-medicine-sweden/nallo pipeline - params.extra_snfs schema", - "description": "Schema for the file provided with params.extra_snfs", - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] - }, - "file": { - "format": "file-path", - "type": "string", - "pattern": "^\\S+\\.snf$", - "errorMessage": "SNF file must be provided, cannot contain spaces and must have extension '.snf" - } - }, - "required": ["sample", "file"] - } -} diff --git a/assets/vep_plugin_files_schema.json b/assets/vep_plugin_files_schema.json new file mode 100644 index 00000000..d904317b --- /dev/null +++ b/assets/vep_plugin_files_schema.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/vep_plugin_files_schema.json", + "title": "Schema for VEP plugin files and their indices", + "description": "Schema for VEP plugin files and their indices", + "type": "array", + "items": { + "type": "object", + "properties": { + "vep_files": { + "type": "string", + "anyOf": [ + { + "format": "file-path" + }, + { + "format": "directory-path" + } + ], + "exists": true, + "description": "Path to vep plugin files and their indices" + } + }, + "required": ["vep_files"] + } +} diff --git a/bin/add_most_severe_consequence.py b/bin/add_most_severe_consequence.py new file mode 100755 index 00000000..cb1538c7 --- /dev/null +++ b/bin/add_most_severe_consequence.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 + +# Written by Ramprasad Neethiraj and released under the MIT license. +# See git repository (https://github.com/nf-core/raredisease) for full license text. + +import argparse +import gzip +import sys +from pathlib import Path +from typing import Tuple, TextIO + + +def parse_vep_csq_transcripts( + transcripts: list, allele_ind: int, csq_ind: int, hgnc_ind: int, var_csq: list +) -> Tuple[list, list, list, list]: + """ + Parse conseqences for each transcript and return HGNC IDs, alleles, and their severity rank + based on the term's ranking in the ensembl consequences list. + + Args: + transcripts (list): A list of vep transcript annotation + allele_ind (int) : Index of the "allele" in the vep annotation record + csq_ind (int) : Index of the "Consequence" in the vep annotation record + hgnc_ind (int) : Index of the "HGNC_ID" in the vep annotation record + var_csq (list): A list of consequence terms ordered by rank + + Returns: + hgnc_ids (list): list of hgnc ids in the record + alleles (list): list of alleles in the record + consequences (list): list of consequence terms in the record + severity (list): list of consequence term ranks + """ + + consequences = [] + hgnc_ids = [] + severity = [] + alleles = [] + for transcript in transcripts: + vep_fields = transcript.strip().split("|") + csq = vep_fields[csq_ind].split("&")[0] + hgnc_id = vep_fields[hgnc_ind] + allele = vep_fields[allele_ind].replace("CSQ=", "") + consequences.append(csq) + hgnc_ids.append(hgnc_id) + severity.append(var_csq.index(csq)) + alleles.append(allele) + return hgnc_ids, alleles, consequences, severity + + +def construct_most_severe_consequence_info( + line: str, allele_ind: int, csq_ind: int, hgnc_ind: int, var_csq: list +) -> list: + """ + Parse conseqences for each transcript and return HGNC IDs, alleles, and their severity rank + based on the term's ranking in the ensembl consequences list. + + Args: + line (str) : Vcf record + allele_ind (int) : Index of the "allele" in the vep annotation record + csq_ind (int) : Index of the "Consequence" in the vep annotation record + hgnc_ind (int) : Index of the "HGNC_ID" in the vep annotation record + var_csq (list): A list of consequence terms ordered by rank + + Returns: + columns (list): A list of fields in the vcf record with most severe consequence added + to the INFO column + """ + + columns = line.strip().split() + info_fields = columns[7].split(";") + for field in info_fields: + if field.startswith("CSQ="): + transcripts = field.split("CSQ=")[1].split(",") + hgnc_ids, alleles, consequences, severity = parse_vep_csq_transcripts( + transcripts, allele_ind, csq_ind, hgnc_ind, var_csq + ) + unique_ids = list(set(hgnc_ids)) + mscsq_anno = [] + for gene_id in unique_ids: + if gene_id != "": + indices = find_indices(hgnc_ids, gene_id) + alleles_sub = [alleles[i] for i in indices] + consequences_sub = [consequences[i] for i in indices] + severity_sub = [severity[i] for i in indices] + most_severe_csq = consequences_sub[severity_sub.index(min(severity_sub))] + most_severe_allele = alleles_sub[severity_sub.index(min(severity_sub))] + mscsq_anno.append(gene_id + ":" + most_severe_allele + "|" + most_severe_csq) + if mscsq_anno: + columns[7] += ";most_severe_consequence=" + ",".join(mscsq_anno) + return columns + + +def find_indices(list_to_check: list, item_to_find: str) -> list: + """ + Get indices of an element in a list + + Args: + list_to_check (list) + item_to_find (value) + + Returns: + indices (list) + """ + indices = [] + for idx, value in enumerate(list_to_check): + if value == item_to_find: + indices.append(idx) + return indices + + +def parse_vep_csq_schema(line: str) -> Tuple[int, int, int]: + """ + Get indices of allele, consequence, and hgnc id in the annotation + + Args: + line: INFO line in the vcf header with CSQ information + + Returns: + allele_ind (int) : Index of the "allele" in the vep annotation record + csq_ind (int) : Index of the "Consequence" in the vep annotation record + hgnc_ind (int) : Index of the "HGNC_ID" in the vep annotation record + """ + fields = line.strip().split("Format: ")[1].replace('">', "").split("|") + allele_ind = fields.index("Allele") + csq_ind = fields.index("Consequence") + hgnc_ind = fields.index("HGNC_ID") + + return allele_ind, csq_ind, hgnc_ind + + +def write_csq_annotated_vcf(file_in: TextIO, file_out: TextIO, var_csq: list): + """Add most severe consequence field to record, and write the record to a vcf file""" + for line in file_in: + if line.startswith("#"): + file_out.write(line) + if line.startswith("##INFO=\n' + ) + else: + mscsq = construct_most_severe_consequence_info(line, allele_ind, csq_ind, hgnc_ind, var_csq) + file_out.write("\t".join(mscsq) + "\n") + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Annotate vcf with the most severe consequence field.", + epilog="Example: python vcfparser.py --file_in vep.vcf --file_out vep.most_severe_csq.vcf --variant_csq variant_consequence.txt", + ) + parser.add_argument( + "--file_in", + metavar="FILE_IN", + type=Path, + help="Vcf file annotated with vep.", + ) + parser.add_argument( + "--file_out", + metavar="FILE_OUT", + type=Path, + help="Vcf with most_severe_consequence annotations added to it.", + ) + parser.add_argument( + "--variant_csq", + metavar="VARIANT_CSQ", + type=Path, + help="Variant consequences ranked by severity", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if not args.file_in.is_file(): + print(f"The given input file {args.file_in} was not found!") + sys.exit(2) + if not args.variant_csq.is_file(): + print(f"The given variant consequence file {args.variant_csq} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + with open(args.variant_csq) as f: + var_csq = [line.strip() for line in f] + opener = gzip.open if (args.file_in.suffix == ".gz") else open + with open(args.file_out, "w") as out_vcf: + with opener(args.file_in, "rt") as in_vcf: + write_csq_annotated_vcf(in_vcf, out_vcf, var_csq) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/add_most_severe_pli.py b/bin/add_most_severe_pli.py new file mode 100755 index 00000000..b578fe28 --- /dev/null +++ b/bin/add_most_severe_pli.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 + +# Written by Ramprasad Neethiraj and released under the MIT license. +# See git repository (https://github.com/nf-core/raredisease) for full license text. + +import argparse +import gzip +import sys +from pathlib import Path +from typing import TextIO + + +def parse_vep_transcripts(transcripts: list, pli_ind: int) -> list: + """ + Parse each transcript and return a list of pli values. + + Args: + transcripts (list): A list of vep transcript annotation + pli_ind (int) : Index of pli value in the vep annotation record + + Returns: + pli_values (list): list of pli values in the record + """ + + pli_values = [] + for transcript in transcripts: + vep_fields = transcript.strip().split("|") + pli_value = vep_fields[pli_ind] + pli_values.append(pli_value) + return pli_values + + +def construct_most_severe_pli_info(line: str, pli_ind: int) -> list: + """ + Parse gene symbols, find the highest pli value of all gene symbols, add most_severe_pli tag to the info + field and return a list of modified columns + + Args: + line (str) : Vcf record + pli_ind (int) : Index of pli value in the vep annotation record + + Returns: + columns (list): A list of fields in the vcf record with most severe pli added + to the INFO column + """ + + columns = line.strip().split() + info_fields = columns[7].split(";") + for field in info_fields: + if field.startswith("CSQ="): + transcripts = field.split("CSQ=")[1].split(",") + break + pli_values = parse_vep_transcripts(transcripts, pli_ind) + try: + pli_max = max(pli_values) + except ValueError: + pli_max = "" + if pli_max: + columns[7] += ";most_severe_pli={:.2f}".format(float(pli_max)) + return columns + + +def parse_vep_csq_schema(line: str) -> int: + """ + Get indices of gene symbol in the annotation + + Args: + line: INFO line in the vcf header with CSQ information + + Returns: + pli_ind (int) : Index of pli value in the vep annotation record + """ + fields = line.strip().split("Format: ")[1].replace('">', "").split("|") + pli_ind = fields.index("pLI_gene_value") + + return pli_ind + + +def write_pli_annotated_vcf(file_in: TextIO, file_out: TextIO): + """Add most severe pli field to record, and write the record to a vcf file""" + for line in file_in: + if line.startswith("#"): + file_out.write(line) + if line.startswith("##INFO=\n' + ) + else: + vcf_record = construct_most_severe_pli_info(line, pli_ind) + file_out.write("\t".join(vcf_record) + "\n") + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Annotate vcf with the most severe pli field.", + epilog="Example: python vcfparser.py --file_in vep.vcf --file_out vep.most_severe_pli.vcf", + ) + parser.add_argument( + "--file_in", + metavar="FILE_IN", + type=Path, + help="Vcf file annotated with vep's pli plugin.", + ) + parser.add_argument( + "--file_out", + metavar="FILE_OUT", + type=Path, + help="Vcf with most_severe_pli annotations added to it.", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if not args.file_in.is_file(): + print(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + opener = gzip.open if (args.file_in.suffix == ".gz") else open + with open(args.file_out, "w") as out_vcf: + with opener(args.file_in, "rt") as in_vcf: + write_pli_annotated_vcf(in_vcf, out_vcf) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config new file mode 100644 index 00000000..1644627f --- /dev/null +++ b/conf/modules/annotate_cadd.config @@ -0,0 +1,66 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// CADD annotation +// + +process { + + withName: '.*:ANNOTATE_CADD:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' { + ext.args = [ + '--output-type z', + '--types indels,other' + ].join(' ') + ext.prefix = { "${vcf.simpleName}_indels" } + } + + withName: '.*:ANNOTATE_CADD:CADD' { + ext.prefix = { "${vcf.simpleName}_cadd" } + } + + withName: '.*:ANNOTATE_CADD:TABIX_CADD' { + ext.args = { "--force --sequence 1 --begin 2 --end 2" } + } + + withName: '.*:ANNOTATE_CADD:ANNOTATE_INDELS' { + ext.args = [ + '--columns Chrom,Pos,Ref,Alt,-,CADD', + '--output-type z', + '--write-index=tbi' + ].join(' ') + ext.prefix = { "${input.simpleName}_ann" } + } + + withName: '.*:ANNOTATE_CADD:REFERENCE_TO_CADD_CHRNAMES' { + ext.args2 = '\'{original=$1; sub("chr","",$1); print original, $1}\'' + ext.prefix = "reference_to_cadd" + ext.suffix = "txt" + } + + withName: '.*:ANNOTATE_CADD:CADD_TO_REFERENCE_CHRNAMES' { + ext.args2 = '\'{original=$1; sub("chr","",$1); print $1, original}\'' + ext.prefix = "cadd_to_reference" + ext.suffix = "txt" + } + + withName: '.*:ANNOTATE_CADD:RENAME_CHRNAMES' { + ext.args = '--output-type z' + } +} diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config new file mode 100644 index 00000000..3158c4f4 --- /dev/null +++ b/conf/modules/annotate_consequence_pli.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +process { + withName: '.*:ANN_CSQ_PLI_SNV:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*ANN_CSQ_PLI_SNV:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_snv_csq" } + } + + withName: '.*ANN_CSQ_PLI_SNV:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_snv_csq_pli" } + } + + withName: '.*ANN_CSQ_PLI_SNV:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_snv_csq_pli" } + } +} diff --git a/conf/modules/annotate_repeat_expansions.config b/conf/modules/annotate_repeat_expansions.config new file mode 100644 index 00000000..93538810 --- /dev/null +++ b/conf/modules/annotate_repeat_expansions.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*:ANNOTATE_REPEAT_EXPANSIONS:.*' { + publishDir = [ + enabled: false + ] + } + withName: '.*ANNOTATE_REPEAT_EXPANSIONS:STRANGER' { + ext.prefix = { "${meta.id}_stranger" } + ext.args = '--trgt' + } + + withName: '.*:ANNOTATE_REPEAT_EXPANSIONS:COMPRESS_STRANGER' { + ext.prefix = { "${meta.id}_repeat_expansion_stranger" } + ext.args = [ + '--output-type z', + '--write-index=tbi' + ].join(' ') + publishDir = [ + path: { "${params.outdir}/repeat_annotation/stranger/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/bam_infer_sex.config b/conf/modules/bam_infer_sex.config index fc2c91ad..d247a841 100644 --- a/conf/modules/bam_infer_sex.config +++ b/conf/modules/bam_infer_sex.config @@ -24,7 +24,11 @@ process { ] } - withName: '.*:BAM_INFER_SEX:SOMALIER_RELATE' { + withName: '.*:BAM_INFER_SEX:RELATE_INFER' { + ext.args = '--infer' + } + + withName: '.*:BAM_INFER_SEX:RELATE_RELATE' { ext.args = '--infer' publishDir = [ path: { "${params.outdir}/qc_aligned_reads/somalier/relate/${meta.id}" }, diff --git a/conf/modules/repeat_calling.config b/conf/modules/call_repeat_expansions.config similarity index 83% rename from conf/modules/repeat_calling.config rename to conf/modules/call_repeat_expansions.config index 99cb5e58..d46150df 100644 --- a/conf/modules/repeat_calling.config +++ b/conf/modules/call_repeat_expansions.config @@ -18,16 +18,18 @@ process { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: '.*:REPEAT_ANALYSIS:.*' { + withName: '.*:CALL_REPEAT_EXPANSIONS:.*' { publishDir = [ enabled: false, ] } - withName: '.*:REPEAT_ANALYSIS:SAMTOOLS_SORT_TRGT' { + withName: '.*:CALL_REPEAT_EXPANSIONS:TRGT' { + ext.args = { "--sample-name ${meta.id}" } + } + withName: '.*:CALL_REPEAT_EXPANSIONS:SAMTOOLS_SORT_TRGT' { ext.prefix = { "${meta.id}_spanning_sorted" } - publishDir = [ path: { "${params.outdir}/repeat_calling/trgt/single_sample/${meta.id}" }, mode: params.publish_dir_mode, @@ -35,8 +37,7 @@ process { ] } - withName: '.*:REPEAT_ANALYSIS:SAMTOOLS_INDEX_TRGT' { - + withName: '.*:CALL_REPEAT_EXPANSIONS:SAMTOOLS_INDEX_TRGT' { publishDir = [ path: { "${params.outdir}/repeat_calling/trgt/single_sample/${meta.id}" }, mode: params.publish_dir_mode, @@ -44,14 +45,12 @@ process { ] } - withName: '.*:REPEAT_ANALYSIS:BCFTOOLS_SORT_TRGT' { - + withName: '.*:CALL_REPEAT_EXPANSIONS:BCFTOOLS_SORT_TRGT' { ext.prefix = { "${meta.id}_sorted" } ext.args = [ '--output-type z', '--write-index=tbi' ].join(' ') - publishDir = [ path: { "${params.outdir}/repeat_calling/trgt/single_sample/${meta.id}" }, mode: params.publish_dir_mode, @@ -59,13 +58,12 @@ process { ] } - withName: '.*:REPEAT_ANALYSIS:BCFTOOLS_MERGE' { - + withName: '.*:CALL_REPEAT_EXPANSIONS:BCFTOOLS_MERGE' { ext.args = [ '--output-type z', - '--write-index=tbi' + '--write-index=tbi', + '--force-single' ].join(' ') - publishDir = [ path: { "${params.outdir}/repeat_calling/trgt/multi_sample/${meta.id}" }, mode: params.publish_dir_mode, @@ -73,10 +71,8 @@ process { ] } - withName: '.*:REPEAT_ANALYSIS:BCFTOOLS_INDEX_MERGE' { - + withName: '.*:CALL_REPEAT_EXPANSIONS:BCFTOOLS_INDEX_MERGE' { ext.args = '--tbi' - publishDir = [ path: { "${params.outdir}/repeat_calling/trgt/multi_sample/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/conf/modules/bam_to_fastq.config b/conf/modules/convert_input_files.config similarity index 78% rename from conf/modules/bam_to_fastq.config rename to conf/modules/convert_input_files.config index dcdabb6b..4d0dccfb 100644 --- a/conf/modules/bam_to_fastq.config +++ b/conf/modules/convert_input_files.config @@ -18,19 +18,18 @@ process { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: '.*:BAM_TO_FASTQ:.*' { + withName: '.*:CONVERT_INPUT_FILES:.*' { publishDir = [ enabled: false, ] } - withName: '.*:BAM_TO_FASTQ:SAMTOOLS_FASTQ' { - - // Maybe should only allow unmapped data - // Unsure why SA tag is still there after reset - ext.args = '-x SA' // samtools reset - ext.args2 = '-T \\*' // samtools fastq - + withName: '.*:CONVERT_INPUT_FILES:SAMTOOLS_FASTQ' { ext.prefix = { "${input}" } + ext.args = '-T \\*' + } + + withName: '.*:CONVERT_INPUT_FILES:SAMTOOLS_IMPORT' { + ext.args = '-T \\*' } } diff --git a/conf/modules/general.config b/conf/modules/general.config index b7038fe4..3f5ad9f1 100644 --- a/conf/modules/general.config +++ b/conf/modules/general.config @@ -24,11 +24,53 @@ process { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: '.*:NALLO:FASTQC' { - ext.args = '--quiet' + withName: '.*:NALLO:BCFTOOLS_STATS' { + ext.prefix = { "${vcf}" } + publishDir = [ + path: { "${params.outdir}/snvs/stats/single_sample" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - ext.prefix = { "${reads}" } + withName: '.*:NALLO:BCFTOOLS_CONCAT' { + ext.prefix = { params.skip_snv_annotation ? "${meta.id}_snv" : (params.skip_rank_variants ? "${meta.id}_snv_annotated" : "${meta.id}_snv_annotated_ranked") } + ext.args = { [ + '--allow-overlaps', + '--output-type z', + '--write-index=tbi' + ].join(' ') } + publishDir = [ + path: { "${params.outdir}/snvs/multi_sample/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:NALLO:BCFTOOLS_PLUGINSPLIT' { + ext.args = [ + '--output-type z', + '--write-index=tbi' + ].join(' ') + publishDir = [ + path: { "${params.outdir}/snvs/single_sample/" }, + mode: params.publish_dir_mode, + // Can't use prefix as it would come from the original file + saveAs: { filename -> + if (filename.equals('versions.yml')) { + null + } else { + def matcher = filename =~ /(.+)(\.vcf\.gz(?:\.tbi)?)$/ + def sample = matcher[0][1] + def extension = matcher[0][2] + def annotated = params.skip_snv_annotation ? "" : (params.skip_rank_variants ? "_annotated" : "_annotated_ranked" ) + "${sample}/${sample}_snv${annotated}${extension}" + } + } + ] + } + withName: '.*:NALLO:FASTQC' { + ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/qc_raw_reads/fastqc/${meta.id}" }, mode: params.publish_dir_mode, @@ -37,9 +79,7 @@ process { } withName: '.*:NALLO:FQCRS' { - ext.prefix = { "${reads}" } - publishDir = [ path: { "${params.outdir}/qc_raw_reads/fqcrs/${meta.id}" }, mode: params.publish_dir_mode, @@ -53,6 +93,19 @@ process { ] } + withName: '.*:NALLO:SAMPLESHEET_PED' { + publishDir = [ + enabled: false + ] + } + + withName: '.*:NALLO:SOMALIER_PED' { + publishDir = [ + path: { "${params.outdir}/pedigree" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: '.*:NALLO:SPLIT_BED_CHUNKS' { publishDir = [ enabled: false @@ -65,51 +118,53 @@ process { ] } - withName: '.*:NALLO:FASTP' { - - ext.prefix = { "${reads.simpleName}" } - - ext.args = { [ - '--disable_adapter_trimming', - '--disable_quality_filtering', - "--split ${params.split_fastq}" - ].join(' ').trim() } - + withName: '.*:NALLO:SPLITUBAM' { + ext.args = { "--split ${params.parallel_alignments}" } publishDir = [ enabled: false ] } withName: '.*:NALLO:MINIMAP2_ALIGN' { - + ext.prefix = { "${meta.id}_aligned" } ext.args = { [ "-y", - params.preset.equals('ONT_R10') ? "-x map-ont" : "-x map-hifi", + params.preset.equals('ONT_R10') ? "-x lr:hq" : "-x map-hifi", "--secondary=no", "-Y", "-R @RG\\\\tID:${meta.id}\\\\tSM:${meta.id}" ].join(' ') } - + ext.args3 = { [ + '-x HP,PS', // phasing + '-x AS,CC,CG,CP,H1,H2,HI,H0,IH,MC,MD,MQ,NM,SA,TS' + ].join(' ') } + ext.args4 = '-T \\*' publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/aligned_reads/minimap2/${meta.id}" }, + path: { "${params.outdir}/aligned_reads/${meta.id}" }, // only a single BAM file per sample saveAs: { filename -> - filename.equals('versions.yml') ? null : (meta.n_files == 1 ? filename : null) + filename.equals('versions.yml') ? null : (meta.n_files == 1 && params.skip_phasing_wf ? filename : null) } ] } withName: '.*:NALLO:SAMTOOLS_MERGE' { - ext.args = '--write-index' + ext.prefix = { "${meta.id}_aligned" } + publishDir = [ + path: { "${params.outdir}/aligned_reads/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : (params.skip_phasing_wf ? filename : null) } + ] + } + withName: '.*:NALLO:ECHTVAR_ENCODE' { publishDir = [ - path: { "${params.outdir}/aligned_reads/minimap2/${meta.id}" }, + path: { "${params.outdir}/databases/echtvar/encode/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - } /* diff --git a/conf/modules/methylation.config b/conf/modules/methylation.config index 251f293a..2101bd30 100644 --- a/conf/modules/methylation.config +++ b/conf/modules/methylation.config @@ -24,7 +24,7 @@ process { ] } - withName: '.*:METHYLATION:MODKIT_PILEUP' { + withName: '.*:METHYLATION:MODKIT_PILEUP_UNPHASED' { ext.args = '--combine-mods --cpg --combine-strands' ext.prefix = { "${meta.id}_modkit_pileup" } @@ -36,7 +36,7 @@ process { ] } - withName: '.*:METHYLATION:MODKIT_PILEUP_HAPLOTYPES' { + withName: '.*:METHYLATION:MODKIT_PILEUP_PHASED' { ext.args = '--combine-mods --cpg --combine-strands --partition-tag HP' ext.prefix = { "${meta.id}_modkit_pileup_phased" } @@ -49,7 +49,7 @@ process { } - withName: '.*:METHYLATION:BGZIP_MODKIT_PILEUP' { + withName: '.*:METHYLATION:BGZIP_MODKIT_PILEUP_UNPHASED' { ext.prefix = { "${input.simpleName}" } @@ -60,7 +60,7 @@ process { ] } - withName: '.*:METHYLATION:BGZIP_MODKIT_PILEUP_HAPLOTYPES' { + withName: '.*:METHYLATION:BGZIP_MODKIT_PILEUP_PHASED' { ext.prefix = { "${input.simpleName}" } diff --git a/conf/modules/phasing.config b/conf/modules/phasing.config index acdc69bb..e747b420 100644 --- a/conf/modules/phasing.config +++ b/conf/modules/phasing.config @@ -25,46 +25,42 @@ process { } withName: '.*:PHASING:HIPHASE_SNV' { - + ext.prefix = { "$meta.id}_phased" } ext.args = { [ '--ignore-read-groups', - "--stats-file ${meta.id}.stats.tsv", - "--blocks-file ${meta.id}.blocks.tsv", - "--summary-file ${meta.id}.summary.tsv" + "--stats-file ${meta.id}_phased.stats.tsv", + "--blocks-file ${meta.id}_phased.blocks.tsv", + "--summary-file ${meta.id}_phased.summary.tsv" ].join(' ') } publishDir = [ - path: { "${params.outdir}/phasing/hiphase/snv/${meta.id}" }, + path: { "${params.outdir}/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phasing/hiphase/snv/${meta.id}/${filename}" ) } ] } withName: '.*:PHASING:HIPHASE_SV' { - + ext.prefix = { "$meta.id}_phased" } ext.args = { [ '--ignore-read-groups', - "--stats-file ${meta.id}.stats.tsv", - "--blocks-file ${meta.id}.blocks.tsv", - "--summary-file ${meta.id}.summary.tsv" + "--stats-file ${meta.id}_phased.stats.tsv", + "--blocks-file ${meta.id}_phased.blocks.tsv", + "--summary-file ${meta.id}_phased.summary.tsv" ].join(' ') } - publishDir = [ - path: { "${params.outdir}/phasing/hiphase/sv/${meta.id}" }, + path: { "${params.outdir}/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phasing/hiphase/sv/${meta.id}/${filename}" ) } ] } withName: '.*:PHASING:WHATSHAP_PHASE' { - ext.prefix = { "${meta.id}_phased" } - ext.args = [ '--ignore-read-groups', '--indels' ].join(' ') - publishDir = [ path: { "${params.outdir}/phasing/whatshap/phase/${meta.id}" }, mode: params.publish_dir_mode, @@ -73,9 +69,7 @@ process { } withName: '.*:PHASING:WHATSHAP_STATS' { - ext.prefix = { "${meta.id}_stats" } - publishDir = [ path: { "${params.outdir}/phasing/whatshap/stats/${meta.id}" }, mode: params.publish_dir_mode, @@ -84,45 +78,37 @@ process { } withName: '.*:PHASING:WHATSHAP_HAPLOTAG' { - - ext.prefix = { "${meta.id}_haplotagged" } - + ext.prefix = { "${meta.id}_phased" } ext.args = [ '--ignore-read-groups', '--tag-supplementary' ].join(' ') - publishDir = [ - path: { "${params.outdir}/phasing/whatshap/haplotag/${meta.id}" }, + path: { "${params.outdir}/aligned_reads/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: '.*:PHASING:SAMTOOLS_INDEX_WHATSHAP' { - publishDir = [ - path: { "${params.outdir}/phasing/whatshap/haplotag/${meta.id}" }, + path: { "${params.outdir}/aligned_reads/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: '.*:PHASING:CRAMINO_PHASED' { - ext.args = [ '--karyotype', '--phased' ].join(' ') - ext.prefix = { "${meta.id}_cramino_aligned_phased" } - publishDir = [ path: { "${params.outdir}/qc_aligned_reads/cramino/phased/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - } } diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index dca2bbb3..412d7b75 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -17,6 +17,7 @@ process { enabled: false, ] } + withName: '.*:PREPARE_GENOME:MINIMAP2_INDEX' { if(params.preset == 'revio' | params.preset == 'pacbio') { ext.args = "-x map-hifi" diff --git a/conf/modules/qc.config b/conf/modules/qc_aligned_reads.config similarity index 87% rename from conf/modules/qc.config rename to conf/modules/qc_aligned_reads.config index e47eab2b..dda307bb 100644 --- a/conf/modules/qc.config +++ b/conf/modules/qc_aligned_reads.config @@ -14,15 +14,13 @@ process { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - QC + QC aligned reads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ withName: '.*:QC_ALIGNED_READS:CRAMINO' { - ext.args = '--karyotype' ext.prefix = { "${meta.id}_cramino_aligned" } - publishDir = [ path: { "${params.outdir}/qc_aligned_reads/cramino/unphased/${meta.id}" }, mode: params.publish_dir_mode, @@ -31,14 +29,11 @@ process { } withName: '.*:QC_ALIGNED_READS:MOSDEPTH' { - - // If pipeline is run with bed-file, then don't run wth '--by 500' - if(params.bed) { - ext.args = "--no-per-base" - } else { - ext.args = "--by 500 --no-per-base" - } - + ext.args = { [ + '--fast-mode', + '--no-per-base', + params.bed ? '' : '--by 500' + ].join(' ') } publishDir = [ path: { "${params.outdir}/qc_aligned_reads/mosdepth/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/conf/modules/rank_variants.config b/conf/modules/rank_variants.config new file mode 100644 index 00000000..d253ffa2 --- /dev/null +++ b/conf/modules/rank_variants.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Score and rank SNVs +// + +process { + + withName: '.*:RANK_VARIANTS_SNV:.*' { + publishDir = [ + enabled: false, + ] + } + + withName: '.*:RANK_VARIANTS_SNV:GENMOD_ANNOTATE' { + ext.prefix = { "${meta.id}_snv_genmod_annotate" } + ext.args = { [ + '--annotate_regions', + '--genome-build 38', + '--temp_dir ./' + ].join(' ') } + } + + withName: '.*:RANK_VARIANTS_SNV:GENMOD_MODELS' { + ext.prefix = { "${meta.id}_snv_genmod_models" } + ext.args = "--whole_gene --temp_dir ./" + } + + withName: '.*:RANK_VARIANTS_SNV:GENMOD_SCORE' { + ext.prefix = { "${meta.id}_snv_genmod_score" } + ext.args = "--rank_results" + } + + withName: '.*:RANK_VARIANTS_SNV:GENMOD_COMPOUND' { + ext.prefix = { "${meta.id}_snv_genmod_compound" } + ext.args = "--temp_dir ./" + } + + withName: '.*:RANK_VARIANTS_SNV:BCFTOOLS_SORT' { + ext.when = false + } + +} diff --git a/conf/modules/scatter_genome.config b/conf/modules/scatter_genome.config new file mode 100644 index 00000000..6e615a4f --- /dev/null +++ b/conf/modules/scatter_genome.config @@ -0,0 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*:SCATTER_GENOME:.*' { + publishDir = [ + enabled: false, + ] + } + + withName: '.*:SCATTER_GENOME:BEDTOOLS_SORT' { + ext.prefix = { "${meta.id}_sorted"} + } + + withName: '.*:SCATTER_GENOME:BEDTOOLS_MERGE' { + ext.prefix = { "${meta.id}_merged"} + } + +} diff --git a/conf/modules/short_variant_calling.config b/conf/modules/short_variant_calling.config index e909d6e2..2559d947 100644 --- a/conf/modules/short_variant_calling.config +++ b/conf/modules/short_variant_calling.config @@ -25,79 +25,43 @@ process { } withName: '.*:SHORT_VARIANT_CALLING:DEEPVARIANT' { - - ext.prefix = { intervals ? "${meta.id}" + "_" + "${intervals.getSimpleName()}" + "_deepvariant" : "${meta.id}" + "_deepvariant"} - - if(params.preset.matches('revio|pacbio')) { - ext.args = { [ - "--sample_name=${meta.id}", - '--model_type=PACBIO', - ].join(' ') } - - } else if(params.preset.matches('ONT_R10')) { - ext.args = { [ - "--sample_name=${meta.id}", - '--model_type=ONT_R104', - ].join(' ') } - } + ext.prefix = { intervals ? "${meta.id}_${intervals}_deepvariant" : "${meta.id}_deepvariant" } + ext.args = { [ + "--sample_name=${meta.id}", + "--model_type=${params.deepvariant_model_type}", + '-vcf_stats_report=False', + meta.sex == 1 ? '--haploid_contigs="chrX,chrY"' : '' + ].join(' ') } } withName: '.*:SHORT_VARIANT_CALLING:GLNEXUS' { ext.args = '--config DeepVariant_unfiltered' - publishDir = [ - path: { "${params.outdir}/snv_calling/multi_sample/glnexus/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_CONCAT_DV' { - - ext.args = '--allow-overlaps' - } - withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_CONCAT_DV_VCF' { - + withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_CONCAT' { ext.args = '--allow-overlaps' - } - withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_SORT_DV' { - ext.prefix = { "${vcf.simpleName}_sorted.g" } + withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_NORM_SINGLESAMPLE' { + ext.prefix = { "${meta.id}_deepvariant_snv" } ext.args = [ - '--output-type z', - '--write-index=tbi' - ].join(' ') - - publishDir = [ - path: { "${params.outdir}/snv_calling/single_sample/deepvariant/gvcf/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + '-m -', + '-w 10000', + '--output-type u', + ].join(' ') } - withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_SORT_DV_VCF' { - ext.prefix = { "${vcf.simpleName}_sorted" } - ext.args = [ - '--output-type z', - '--write-index=tbi' - ].join(' ') - - publishDir = [ - path: { "${params.outdir}/snv_calling/single_sample/deepvariant/vcf/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_FILLTAGS' { + ext.prefix = { "${meta.id}_ac" } + ext.args = '--output-type u' } - withName: '.*:SHORT_VARIANT_CALLING:TABIX_GLNEXUS' { - - publishDir = [ - path: { "${params.outdir}/snv_calling/multi_sample/glnexus/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_NORM_MULTISAMPLE' { + ext.args = [ + '-m -', + '--output-type u', + '--write-index=csi', + '-w 10000' + ].join(' ') } - } diff --git a/conf/modules/snv_annotation.config b/conf/modules/snv_annotation.config index c977f129..36be4406 100644 --- a/conf/modules/snv_annotation.config +++ b/conf/modules/snv_annotation.config @@ -24,54 +24,34 @@ process { ] } - withName: '.*:SNV_ANNOTATION:BCFTOOLS_FILLTAGS' { - ext.prefix = { "${meta.id}_ac" } - } - - withName: '.*:SNV_ANNOTATION:BCFTOOLS_NORM' { - ext.args = "-m - --output-type b -w 10000" - } - - withName: '.*:SNV_ANNOTATION:BCFTOOLS_NORM_SINGLESAMPLE' { - ext.args = "-m - --output-type b -w 10000" - } - withName: '.*:SNV_ANNOTATION:ECHTVAR_ANNO' { - ext.prefix = { "${meta.id}_echtvar_anno" } - } - withName: '.*:SNV_ANNOTATION:ECHTVAR_ENCODE' { - - publishDir = [ - path: { "${params.outdir}/snv_annotation/echtvar/encode/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*:SNV_ANNOTATION:ENSEMBLVEP_VEP' { - - ext.prefix = { "${meta.id}_vep" } - + withName: 'BCFTOOLS_FILLTAGS_ANNO' { + ext.prefix = { "${meta.id}_filltags_anno" } ext.args = [ - '--vcf', - '--offline' + '--output-type z', + '--write-index=tbi' ].join(' ') - - publishDir = [ - path: { "${params.outdir}/snv_annotation/ensemblvep/vep/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } - withName: '.*:SNV_ANNOTATION:TABIX_VEP' { - - publishDir = [ - path: { "${params.outdir}/snv_annotation/ensemblvep/vep/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*:SNV_ANNOTATION:ENSEMBLVEP_VEP' { + ext.prefix = { "${meta.id}_vep" } + ext.args = { [ + "--dir_plugins .", + "--plugin LoFtool,LoFtool_scores.txt", + "--plugin pLI,pLI_values.txt", + '--distance 5000', + '--buffer_size 20000', + '--format vcf --max_sv_size 248387328', + '--appris --biotype --cache --canonical --ccds --compress_output bgzip', + '--domains --exclude_predicted --force_overwrite', + '--hgvs --humdiv --no_progress --numbers', + '--polyphen p --protein --offline --regulatory --sift p --symbol --tsl', + '--uniprot --vcf', + '--no_stats' + ].join(' ') } } + } diff --git a/conf/modules/structural_variant_calling.config b/conf/modules/structural_variant_calling.config index b5b73eed..8e1e5a28 100644 --- a/conf/modules/structural_variant_calling.config +++ b/conf/modules/structural_variant_calling.config @@ -33,10 +33,10 @@ process { withName: '.*:STRUCTURAL_VARIANT_CALLING:SNIFFLES_MULTISAMPLE' { - ext.prefix = 'multisample_sniffles' + ext.prefix = { "${meta.id}_sniffles" } publishDir = [ - path: { "${params.outdir}/sv_calling/sniffles/multi_sample" }, + path: { "${params.outdir}/sv_calling/sniffles/multi_sample/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index 87d4b8e6..8cc9b923 100644 --- a/conf/test.config +++ b/conf/test.config @@ -9,7 +9,6 @@ ---------------------------------------------------------------------------------------- */ - params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' @@ -24,7 +23,7 @@ params { // Genome references fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz' - input = params.pipelines_testdata_base_path + 'nallo/testdata/samplesheet.csv' + input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/2948776ddf24ea131f527aa1f2dc23a43bb7b952/testdata/samplesheet.csv' bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed' @@ -34,37 +33,72 @@ params { hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed' // Dipcall - dipcall_par = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed' + par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed' - // TRGT + // Repeats trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed' + variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json' // SNV Annotation - vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz' - snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv' + vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz' + vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv' + snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv' + + // Rank variants + reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv' + score_config_snv = params.pipelines_testdata_base_path + 'nallo/reference/rank_model_snv.ini' + variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt' // Somalier somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz' - parallel_snv = 3 // Create 3 parallel DeepVariant processes + parallel_snv = 2 // Create 2 parallel DeepVariant processes preset = "revio" } process { withName: '.*:NALLO:ASSEMBLY:HIFIASM' { - - ext.args = '-f0' + ext.args = '-f0 -k30 -w30 -D10 -r1 -N1' } withName: '.*:CALL_PARALOGS:PARAPHASE' { - ext.args = '--gene hba' } - withName: '.*:NALLO:PHASING:WHATSHAP_PHASE' { + withName: '.*:SNV_ANNOTATION:ENSEMBLVEP_VEP' { + ext.prefix = { "${meta.id}_vep" } + ext.args = { [ + "--dir_plugins .", + "--plugin LoFtool,LoFtool_scores.txt", + "--plugin pLI,pLI_values.txt", + "--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz", + '--distance 5000', + '--buffer_size 20000', + '--format vcf --max_sv_size 248387328', + '--appris --biotype --cache --canonical --ccds --compress_output bgzip', + '--domains --exclude_predicted --force_overwrite', + '--hgvs --humdiv --no_progress --numbers', + '--polyphen p --protein --offline --regulatory --sift p --symbol --tsl', + '--uniprot --vcf', + '--no_stats' + ].join(' ') } + } + withName: '.*:NALLO:PHASING:WHATSHAP_PHASE' { ext.args = '--ignore-read-groups --indels --distrust-genotypes --include-homozygous' } + withName: 'FASTQC|MINIMAP2_INDEX_DIPCALL|PARAPHASE|SNIFFLES|GFASTATS_PATERNAL|GFASTATS_MATERNAL|HIFICNV' { + cpus = 1 + } + + withName: 'MINIMAP2_INDEX|HIFIASM' { + cpus = 2 + } + + withName: 'SAMTOOLS_FASTQ|FASTP|MINIMAP2_ALIGN|GLNEXUS|ENSEMBLVEP_VEP|MODKIT_PILEUP|MODKIT_PILEUP_HAPLOTYPES' { + cpus = 4 + } + } diff --git a/conf/test_full.config b/conf/test_full.config index ae884806..b98b7f74 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,8 +15,6 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' // Genome references diff --git a/docs/README.md b/docs/README.md index ca01fef3..2208a308 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,6 +3,8 @@ The genomic-medicine-sweden/nallo documentation is split into the following pages: - [Usage](usage.md) - - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. + - An overview of how the pipeline works, how to run it, and a description of command-line flags and pipeline parameters needed to run the pipeline. +- [Parameters](parameters.md) + - A description of all available pipeline parameters, including those not descibed in [Usage](usage.md). - [Output](output.md) - An overview of the different results produced by the pipeline and how to interpret them. diff --git a/docs/output.md b/docs/output.md index 3ef826bc..ae221835 100644 --- a/docs/output.md +++ b/docs/output.md @@ -21,8 +21,10 @@ - [Raw read QC](#raw-read-qc) - [FastQC](#fastqc) - [Repeat calling](#repeat-calling) + - [Repeat annotation](#repeat-annotation) - [SNV Annotation](#snv-annotation) - - [SNV Calling](#snv-calling) + - [Ranked Variants](#ranked-variants) + - [SV Calling](#sv-calling) ## Pipeline overview @@ -158,24 +160,27 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files from WhatsHap -- `{outputdir}/phasing/whatshap/haplotag/{sample}/` - - `*.bam`: BAM file with haplotags - - `*.bai`: Index of the corresponding bam file +- `{outputdir}/aligned_reads/{sample}/` + - `{sample}_phased.bam`: BAM file with haplotags + - `{sample}_phased.bam.bai`: Index of the corresponding bam file - `{outputdir}/phasing/whatshap/phase/{sample}/` - `*.vcf.gz`: VCF file with phased variants - `*.vcf.gz.tbi`: Index of the corresponding VCF file - `{outputdir}/phasing/whatshap/stats/{sample}/` - `*.blocks.tsv`: File with phase blocks - - `*.stats.tsv.gz`: File with phasing statistics + - `*.stats.tsv`: File with phasing statistics
Output files from HiPhase +- `{outputdir}/aligned_reads/{sample}/` + + - `{sample}_phased.bam`: BAM file with haplotags + - `{sample}_phased.bam.bai`: Index of the corresponding bam file + - `{outputdir}/phasing/hiphase/{snv,sv}/{sample}/` - - `*.bam`: BAM file with haplotags - - `*.bai`: Index of the corresponding bam file - `*.blocks.tsv`: File with phase blocks - `*.stats.tsv.gz`: File with phasing statistics - `*.vcf.gz`: VCF file with phased variants @@ -240,10 +245,10 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files from Somalier -- `{outputdir}/qc_aligned_reads/somalier/relate/mutlisample/` - - `*.html`: HTML report - - `*.pairs.tsv`: Output information in sample pairs - - `*.samples.tsv`: Output information per sample +- `{outputdir}/qc_aligned_reads/somalier/relate/{project}/` + - `{project}.html`: HTML report + - `{project}.pairs.tsv`: Output information in sample pairs + - `{project}.samples.tsv`: Output information per sample
### Raw read QC @@ -269,7 +274,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files from TRGT -- `{outputdir}/repeat_calling/trgt/multi_sample/multisample/` +- `{outputdir}/repeat_calling/trgt/multi_sample/{project}/` - `*.vcf.gz`: Merged VCF for all samples - `*.vcf.gz.tbi`: Index of the corresponding VCF file - `{outputdir}/repeat_calling/trgt/single_sample/{sample}/` @@ -279,39 +284,68 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ - `*.bai`: Index of the corresponding bam file
+### Repeat annotation + +[Stranger](https://github.com/Clinical-Genomics/stranger) is used to annotate repeats. + +
+Output files from Stranger + +- `{outputdir}/repeat_annotation/stranger/{sample}` + - `*.vcf.gz`: Annotated VCF + - `*.vcf.gz.tbi`: Index of the corresponding VCF file +
+ ### SNV Annotation -[echtvar](https://github.com/brentp/echtvar) and [VEP](https://www.ensembl.org/vep) is used to annotate small variants. +[echtvar](https://github.com/brentp/echtvar) and [VEP](https://www.ensembl.org/vep) are used to annotate small variants. +[bcftools](https://samtools.github.io/bcftools/) is used to generate statistics.
Output files from SNV Annotation -- `{outputdir}/snv_annotation/echtvar/encode/multisample/` +- `{outputdir}/databases/echtvar/encode/{project}/` - `*.zip`: Database with AF and AC for all samples run -- `{outputdir}/snv_annotation/ensemblvep/vep/{sample}/` - - `*.html`: Summary HTML report - - `*.vcf.gz`: VCF with annotated variants - - `*.vcf.gz.tbi`: Index of the corresponding VCF file +- `{outputdir}/snvs/{single_sample,multi_sample/{project}/` + - `*_snvs_annotated*.vcf.gz`: VCF with annotated variants + - `*_snvs_annotated*.vcf.gz.tbi`: Index of the corresponding VCF file +- `{outputdir}/snvs/stats/single_sample/` + - `*.stats.txt`: Variant statistics
-### SNV Calling +### Ranked variants + +#### Filter_vep + +[filter_vep from VEP](https://www.ensembl.org/info/docs/tools/vep/script/vep_filter.html) is used to subset the variants based on a list of HGNC ID:s. Typical use case is that you want to filter your results to only include variants in a predefined set of clinically relevant genes. This step is optional and can be disabled by using the flag `--skip_vep_filter`. You will always get the complete VCF together with the clinical VCF. + +#### GENMOD + +[GENMOD](https://github.com/Clinical-Genomics/genmod) is a simple to use command line tool for annotating and analyzing genomic variations in the VCF file format. GENMOD can annotate genetic patterns of inheritance in vcf files with single or multiple families of arbitrary size. Each variant will be assigned a predicted pathogenicity score. The score will be given both as a raw score and a normalized score with values between 0 and 1. The tags in the INFO field are `RankScore` and `RankScoreNormalized`. The score can be configured to fit your annotations and preferences by modifying the score config file. + +
+Output files + +- `{outputdir}/snvs/single_sample/{sample}/` + - `{sample}_snv_annotated_ranked.vcf.gz`: VCF with annotated and ranked variants + - `{sample}_snv_annotated_ranked.vcf.gz.tbi`: Index of the corresponding VCF file +- `{outputdir}/snvs/multi_sample/{project}/` + - `{project}_snv_annotated_ranked.vcf.gz`: VCF with annotated and ranked variants + - `{project}_snv_annotated_ranked.vcf.gz.tbi`: Index of the corresponding VCF file +
+ +### SV Calling [Sniffles](https://github.com/fritzsedlazeck/Sniffles) is used to call and merge structural variants.
Output files from SNV Calling -- `{outputdir}/sv_calling/multi_sample/` +- `{outputdir}/sv_calling/multi_sample/{project}` - `*.vcf.gz`: VCF with variants - `*.vcf.gz.tbi`: Index of the corresponding VCF file - `{outputdir}/sv_calling/single_sample/{sample}` - `*.snf`: Sniffles SNF file - `*.vcf.gz`: VCF with variants - `*.vcf.gz.tbi`: Index of the corresponding VCF file -- `{outputdir}/snv_calling/single_sample/deepvariant/gvcf/{sample}/` - - `*.g.vcf.gz`: gVCF with variants - - `*.g.vcf.gz.tbi`: Index of the corresponding gVCF file -- `{outputdir}/snv_calling/single_sample/deepvariant/vcf/{sample}/` - - `*.vcf.gz`: VCF with variants - - `*.vcf.gz.tbi`: Index of the corresponding VCF file
diff --git a/docs/parameters.md b/docs/parameters.md new file mode 100644 index 00000000..890af543 --- /dev/null +++ b/docs/parameters.md @@ -0,0 +1,177 @@ + + +# genomic-medicine-sweden/nallo pipeline parameters + +Long-read variant calling pipeline + +## Workflow skip options + +Allows skipping certain parts of the pipeline + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `skip_aligned_read_qc` | Skip QC of aligned reads | `boolean` | False | | | +| `skip_raw_read_qc` | Skip QC of unaligned (raw) reads | `boolean` | False | | | +| `skip_short_variant_calling` | Skip short variant calling | `boolean` | False | | | +| `skip_assembly_wf` | Skip genome assembly and assembly variant calling | `boolean` | False | | | +| `skip_mapping_wf` | Skip read mapping (alignment) | `boolean` | False | | | +| `skip_methylation_wf` | Skip generation of methylation pileups | `boolean` | False | | | +| `skip_repeat_calling` | Skip tandem repeat calling | `boolean` | False | | | +| `skip_repeat_annotation` | Skip tandem repeat annotation | `boolean` | False | | | +| `skip_phasing_wf` | Skip phasing of variants and haplotagging of reads | `boolean` | False | | | +| `skip_snv_annotation` | Skip short variant annotation | `boolean` | False | | | +| `skip_cnv_calling` | Skip CNV calling | `boolean` | False | | | +| `skip_call_paralogs` | Skip the calling of specific paralogous genes | `boolean` | False | | | +| `skip_rank_variants` | Skip ranking of short variants | `boolean` | False | | | + +## Input/output options + +Define where the pipeline should find input data and save output data. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will +need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its +location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | +| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | +| True | | +| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary +e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to +specify this on the command line for every run.
| `string` | | | | +| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | | + +## Reference genome options + +Reference genome related files and options required for the workflow. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `fasta` | Reference genome | `string` | | | | +| `genome` | Name of iGenomes reference.
HelpIf using a reference genome configured in the pipeline using +iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files +e.g. `--genome GRCh38`.

See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.
| +`string` | | | | +| `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running +the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in +`igenomes.config`.
| `boolean` | True | | True | + +## Institutional config options + +Parameters used to describe centralised config profiles. These should not be edited. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will +not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you +should download the files from the repo and tell Nextflow where to find them with this parameter.
| `string` | +https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | +| `config_profile_contact` | Institutional config contact information. | `string` | | | True | +| `config_profile_url` | Institutional config URL link. | `string` | | | True | + +## Max job request options + +Set the top limit for requested resources for any single job. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `max_cpus` | Maximum number of CPUs that can be requested for any single job.
HelpUse to set an upper-limit for +the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`
| `integer` | 16 | | True | +| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit +for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`
| `string` | +128.GB | | True | +| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for +the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`
| `string` | 240.h | +| True | + +## Generic options + +Less common options for the pipeline, typically set in a config file. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `help` | Display help text. | `boolean` | | | True | +| `version` | Display version and exit. | `boolean` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` +option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move +these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.
| `string` | copy | | +True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send +a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.
| `string` | | | True | +| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | +| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | +| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | +| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS +Teams and Slack are supported.
| `string` | | | True | +| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True | +| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True | +| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | | +| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | +| `validationShowHiddenParams` | Show all params when using `--help`
HelpBy default, parameters set as _hidden_ +in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all +parameters.
| `boolean` | | | True | +| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True | +| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True | +| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True | + +## Workflow options + +Workflow options specific to genomic-medicine-sweden/nallo + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio | +True | | +| `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | | +| `phaser` | Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`) | `string` | whatshap | | | +| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | | +| `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. | +`integer` | 1 | | | +| `parallel_snv` | If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time. | `integer` | 13 | | +| +| `vep_cache_version` | VEP cache version | `integer` | 110 | | | +| `vep_plugin_files` | A csv file with paths to vep plugin files. pLI and LoFtool are required. | `string` | | | | +| `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO | +| True | + +## File inputs + +The different files that are required. Some are only required by certain workflows, see the usage documentation. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `cadd_prescored` | Path to a directory containing prescored indels for CADD.
HelpThis folder contains the +compressed files and indexes that would otherwise be in data/prescored folder as described in +https://github.com/kircherlab/CADD-scripts/#manual-installation.
| `string` | | | | +| `cadd_resources` | Path to a directory containing CADD annotations.
HelpThis folder contains the uncompressed +files that would otherwise be in data/annotation folder as described in +https://github.com/kircherlab/CADD-scripts/#manual-installation.
| `string` | | | | +| `par_regions` | Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant | `string` | | | | +| `tandem_repeats` | A tandem repeat BED file for sniffles | `string` | | | | +| `trgt_repeats` | A BED file with repeats to be genotyped with TRGT | `string` | | | | +| `snp_db` | A csv file with echtvar databases to annotate SNVs with | `string` | | | | +| `variant_catalog` | A variant catalog json-file for stranger | `string` | | | | +| `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating +genomic SNVs.
HelpFor more information check +https://ensembl.org/info/genome/variation/prediction/predicted_data.html
| `string` | | | | +| `vep_cache` | A path to the VEP cache location | `string` | | | | +| `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | | +| `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | | +| `hificnv_xx` | A BED file containing expected copy number regions for XX samples. | `string` | | | | +| `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | | +| `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | | +| `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | | +| `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | | +| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found. +
HelpBy default, when an unrecognised parameter is found, it returns a warning.
| `boolean` | | +| True | +| `validationLenientMode` | Validation of parameters in lenient more.
HelpAllows string values that are parseable +as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).
| +`boolean` | | | True | +| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` | +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ | | True | + + + diff --git a/docs/usage.md b/docs/usage.md index 7cb614f5..c4ed524c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,11 +10,11 @@ genomic-medicine-sweden/nallo is a bioinformatics analysis pipeline to analyse l 2. Install one of the following technologies for full pipeline reproducibility: Docker, Singularity, Podman, Shifter or Charliecloud. > Almost all nf-core pipelines give you the option to use conda as well. However, some tools used in the nallo pipeline do not have a conda package so we do not support conda at the moment. -## Run genomic-medicine-sweden/nallo with test data +## Getting started -Before running the pipeline with your data, we recommend running it with the test dataset available in the `assets/test_data` folder provided with the pipeline. You do not need to download any of the data as part of it came directly with the pipeline and the other part will be fetched automatically for you when you use the test profile. +Before running the pipeline with your data, we recommend running it with the test profile. You do not need to download any of the data as it will be fetched automatically for you when you use the test profile. -Run the following command, where YOURPROFILE is the package manager you installed on your machine. For example, `-profile test,docker` or `-profile test,singularity`: +Run the following command, where YOURPROFILE is the package manager you installed on your machine. For example, `-profile test,docker` or `-profile test,singularity` ``` nextflow run genomic-medicine-sweden/nallo \ @@ -25,7 +25,7 @@ nextflow run genomic-medicine-sweden/nallo \ > Check [nf-core/configs](https://github.com/nf-core/configs/tree/master/conf) to see if a custom config file to run nf-core pipelines already exists for your institute. If so, you can simply use `-profile test,` in your command. This enables the appropriate package manager and sets the appropriate execution settings for your machine. > NB: The order of profiles is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -Running the command creates the following files in your working directory: +Running the command creates the following files in your working directory ``` work # Directory containing the Nextflow working files @@ -41,250 +41,230 @@ work # Directory containing the Nextflow working files The above command downloads the pipeline from GitHub, caches it, and tests it on the test dataset. When you run the command again, it will fetch the pipeline from cache even if a more recent version of the pipeline is available. To make sure that you're running the latest version of the pipeline, update the cached version of the pipeline by including `-latest` in the command. -## Run genomic-medicine-sweden/nallo with your data +## Running genomic-medicine-sweden/nallo with your data -Running the pipeline involves three steps: +Running the pipeline on real data involves three steps: -1. Prepare a samplesheet -2. Gather all required references -3. Supply samplesheet and references, and run the command +1. Preparing a samplesheet with your data +2. Gather required files and references +3. Supply samplesheet, refeferences and files and run the pipeline -## Samplesheet input +## Samplesheet -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. +First, you will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' ``` -It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. -`file` can either be a gzipped-fastq file or an aligned or unalinged BAM file (BAM files will be converted to FASTQ and aligned again). -`phenotype` is not used at the moment but still required, set it to `1`. If you don't have related samples, `family_id` could be set to sample name, and `paternal_id` and `maternal_id` to a value that is not another `sample` name. +It has to be a comma-separated file with 7 columns, and a header row as shown in the example below: ```console -sample,file,family_id,paternal_id,maternal_id,sex,phenotype -HG002,/path/to/HG002.fastq.gz,FAM,HG003,HG004,1,1 -HG005,/path/to/HG005.bam,FAM,HG003,HG004,2,1 +project,sample,file,family_id,paternal_id,maternal_id,sex,phenotype +testrun,HG002,/path/to/HG002.fastq.gz,FAM,HG003,0,1,2 +testrun,HG003,/path/to/HG003.bam,FAM,0,0,2,1 ``` -| Fields | Description | -| ------------- | ------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name, cannot contain spaces. | -| `file` | Absolute path to gzipped FASTQ or BAM file. File has to have the extension ".fastq.gz", .fq.gz" or ".bam". | -| `family_id` | "Family ID must be provided and cannot contain spaces. If no family ID is available you can use the same ID as the sample | -| `paternal_id` | Paternal ID must be provided and cannot contain spaces. If no paternal ID is available, use any ID not in sample column. | -| `maternal_id` | Maternal ID must be provided and cannot contain spaces. If no maternal ID is available, use any ID not in sample column. | -| `sex` | Sex (0=unknown; 1=male; 2=female). | -| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | +| Fields | Description | +| ------------- | --------------------------------------------------------------------------------------------------------------------------------- | +| `project` | Project name must be provided and cannot contain spaces, needs to be the same for all samples." | +| `sample` | Custom sample name, cannot contain spaces. | +| `file` | Absolute path to gzipped FASTQ or BAM file. File has to have the extension ".fastq.gz", .fq.gz" or ".bam". | +| `family_id` | Family ID must be provided and cannot contain spaces. If no family ID is available use the same ID as sample. | +| `paternal_id` | Paternal ID must be provided and cannot contain spaces. If no paternal ID is available, use 0. | +| `maternal_id` | Maternal ID must be provided and cannot contain spaces. If no maternal ID is available, use 0. | +| `sex` | Sex must be provided as 0, 1 or 2 (0=unknown; 1=male; 2=female). If sex is unknown it will be assigned automatically if possible. | +| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -The typical command for running the pipeline is as follows: +## Preset -```bash -nextflow run genomic-medicine-sweden/nallo -profile docker \ - --input samplesheet.csv \ - --preset \ - --outdir \ - --fasta \ - --skip_assembly_wf \ - --skip_repeat_wf \ - --skip_snv_annotation \ - --skip_cnv_calling -``` +This pipeline comes with three different presets that should be set with the `--preset` parameter -## Reference files and parameters +- `revio` (default) +- `pacbio` +- `ONT_R10` -The typical command example above requires no additional files except the reference genome. -Nallo has the ability to skip certain parts of the pipeline, for example `--skip_repeat_wf`. -Some workflows require additional files: +`--skip_assembly_wf` and `--skip_repeat_wf` will be set to true for `ONT_R10` and `--skip_methylation_wf` will be set to true for `pacbio`, meaning these subworkflows are not run. -- If running without `--skip_assembly_wf`, download a BED file with PAR regions ([hg38](https://raw.githubusercontent.com/lh3/dipcall/master/data/hs38.PAR.bed)) to supply with `--dipcall_par`. +## Subworkflows -> [!NOTE] -> Make sure chrY PAR is hard masked in reference. +As indicated above, this pipeline is divided into multiple subworkflows, each with its own input requirements and outputs. By default, all subworklows are active, and thus all mandatory input files are required. -- If running without `--skip_repeat_wf`, download a BED file with tandem repeats ([TRGT](https://github.com/PacificBiosciences/trgt/tree/main/repeats)) matching your reference genome to supply with `--trgt_repeats`. +The only parameter mandatory for all subworkflows is the `--input` and `--outdir` parameters, all other parameters are determined by the active subworkflows. If you would run `nextflow run genomic-medicine-sweden/nallo -profile docker --outdir results --input samplesheet.csv` + +``` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + --skip_assembly_wf is NOT active, the following files are required: --dipcall_par + --skip_snv_annotation is NOT active, the following files are required: --snp_db + --skip_mapping_wf is NOT active, the following files are required: --somalier_sites + --skip_snv_annotation is NOT active, the following files are required: --vep_cache + ... +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``` -- If running without `--skip_snv_annotation`, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) to supply with `--vep_cache` and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)) to supply with `--snp_db`: +The pipeline will try to guide you through which files are required, but a thorough description is provided below. -`snp_dbs.csv` +Additionally, if you want to skip a subworkflow, you will need to explicitly state to skip all subworklow that relies on it. For example, `nextflow run genomic-medicine-sweden/nallo -profile docker --outdir results --input samplesheet.csv --skip_mapping_wf` will tell you ``` -sample,file -gnomad,/path/to/gnomad.v3.1.2.echtvar.popmax.v2.zip -cadd,/path/to/cadd.v1.6.hg38.zip +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + --skip_mapping_wf is active, the pipeline has to be run with: --skip_aligned_read_qc --skip_assembly_wf --skip_call_paralogs --skip_short_variant_calling --skip_snv_annotation --skip_cnv_calling --skip_phasing_wf --skip_rank_variants --skip_repeat_calling --skip_repeat_annotation --skip_methylation_wf + ... +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``` -- If running without `--skip_cnv_calling`, expected CN regions for your reference genome can be downloaded from [HiFiCNV GitHub](https://github.com/PacificBiosciences/HiFiCNV/tree/main/data) to supply with `--hificnv_xy`, `--hificnv_xx` (expected_cn) and `--hificnv_exclude` (excluded_regions). +Because almost all other subworkflows relies on the mapping subworkflow. -- If you want to include extra samples for mili-sample calling of SVs - prepare a samplesheet with .snf files from Sniffles to supply with `--extra_snfs`: +If you want to run the pipeline without any other input files than `--input samplesheet.csv`, all of the above skips will need to be active, and the pipeline will run only unaligned read QC. -`extra_snfs.csv` +## Reference files and parameters -``` -sample,file -HG01123,/path/to/HG01123_sniffles.snf -HG01124,/path/to/HG01124_sniffles.snf -``` +As descibed above, the files required depend on the active subworkflows. All parameters are listed [here](parameters.md), but the most useful parameters needed to run the pipeline described in more detail below. -- For SNVs - prepare a samplesheet with gVCF files from DeepVariant to supply with `--extra_gvcfs`: +### Raw read QC (`--skip_raw_read_qc`) + +This subworkflow requires no additional files. + +### Mapping (`--skip_mapping_wf`) + +The majority of subworkflows depend on the mapping (alignment) subworkflow which requires `--fasta` and `--somalier_sites`. + +| Parameter | Description | +| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `fasta` | Reference genome, either gzipped or uncompressed FASTA (e.g. [GRCh38_no_alt_analysis_set.fna.gz](ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz)) | +| `somalier_sites` | A VCF of known polymorphic sites (e.g. [sites.hg38.vcg.gz](https://github.com/brentp/somalier/files/3412456/sites.hg38.vcf.gz)), from which sex will be inferred if possible. | + +### Aligned read QC (`--skip_aligned_read_qc`) + +This subworkflow depends on the mapping subworkflow, but requires no additional files. + +### Assembly (`--skip_assembly_wf`) + +This subworkflow contains both genome assembly and assembly variant calling. The assemblyt variant calling needs the sex of samples and for samples with unknown sex this is inferred from aligned reads, therefore it depends on the mapping subworkflow. + +It requires a BED file with PAR regions. + +| Parameter | Description | +| ------------- | --------------------------------------------------------------------------------------------------------------------------------- | +| `par_regions` | A BED file with PAR regions (e.g. [GRCh38_PAR.bed](ttps://storage.googleapis.com/deepvariant/case-study-testdata/GRCh38_PAR.bed)) | > [!NOTE] -> These has to have been generated with the same version of reference genome. +> Make sure chrY PAR is hard masked in reference genome you are using. -`extra_gvcfs.csv` +### Call paralogs (`--skip_call_paralogs`) -``` -sample,file -HG01123,/path/to/HG01123.g.vcf.gz -HG01124,/path/to/HG01124.g.vcf.gz -HG01125,/path/to/HG01125.g.vcf.gz -``` +This subworkflow depends on the mapping subworkflow, but requires no additional files. + +> [!NOTE] +> Only GRCh38 is supported. -- If running without `--skip_call_paralogs`, the reference genome needs to be hg38 +### Short variant calling (`--skip_short_variant_calling`) -- If running without `--skip_mapping_wf`, a VCF of known polymorphic sites (e.g. [sites.hg38.vcg.gz](https://github.com/brentp/somalier/files/3412456/sites.hg38.vcf.gz)) needs to be supplied with `--somalier_sites`, from which sex will be inferred if possible. +This subworkflow depends on the mapping subworkflow, and required the same PAR regions file as the assembly workflow. -#### Highlighted parameters: +| Parameter | Description | +| ------------- | --------------------------------------------------------------------------------------------------------------------------------- | +| `par_regions` | A BED file with PAR regions (e.g. [GRCh38_PAR.bed](ttps://storage.googleapis.com/deepvariant/case-study-testdata/GRCh38_PAR.bed)) | -- You can choose to limit SNV calling to regions in BED file (`--bed`). +### CNV calling (`--skip_cnv_calling`) -- By default SNV-calling is split into 13 parallel processes, limit this by setting `--parallel_snv` to a different number. +This subworkflow depends on the mapping and short variant calling subworkflows, and requires the following additional files: -- By default the pipeline does not perform parallel alignment, but this can be set by setting `--split_fastq` to split the input and alignment into N files/processes. +| Parameter | Description | +| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `hificnv_xy` | expected XY copy number regions for your reference genome (e.g. [expected_cn.hg38.XY.bed](https://github.com/PacificBiosciences/HiFiCNV/raw/main/data/expected_cn/expected_cn.hg38.XY.bed)) | +| `hificnv_xx` | expected XX copy number regions for your reference genome (e.g. [expected_cn.hg38.XX.bed](https://github.com/PacificBiosciences/HiFiCNV/raw/main/data/expected_cn/expected_cn.hg38.XX.bed)) | +| `hificnv_exclude` | BED file specifying regions to exclude (e.g. [cnv.excluded_regions.hg38.bed.gz](https://github.com/PacificBiosciences/HiFiCNV/raw/main/data/excluded_regions/cnv.excluded_regions.hg38.bed.gz)) | -All parameters are listed below: +### Phasing (`--skip_phasing_wf`) -## Workflow skip options +This subworkflow phases variants and haplotags aligned BAM files, and such relies on the mapping and short variant calling subworkflows, but requires no additional files. -Options to skip various steps within the workflow +### Methylation (`--skip_methylation_wf`) -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------------------- | ------------------------------------------ | --------- | ------- | -------- | ------ | -| `skip_raw_read_qc` | Skip raw read QC | `boolean` | `False` | | | -| `skip_short_variant_calling` | Skip short variant calling | `boolean` | `False` | | | -| `skip_assembly_wf` | Skip assembly and downstream processes | `boolean` | `False` | | | -| `skip_mapping_wf` | Skip read mapping and downstream processes | `boolean` | `False` | | | -| `skip_methylation_wf` | Skip methylation workflow | `boolean` | `False` | | | -| `skip_repeat_wf` | Skip repeat analysis workflow | `boolean` | `False` | | | -| `skip_phasing_wf` | Skip phasing workflow | `boolean` | `False` | | | -| `skip_snv_annotation` | Skip SNV annotation | `boolean` | `False` | | | -| `skip_cnv_calling` | Skip CNV workflow | `boolean` | `False` | | | -| `skip_call_paralogs` | Skip call paralogs (Paraphase) | `boolean` | `False` | | | - -## Input/output options - -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Type | Default | Required | Hidden | -| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | -------- | ------ | -| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | -| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | | True | | -| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.
| `string` | | | | -| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | | - -## Reference genome options - -Reference genome related files and options required for the workflow. - -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `fasta` | Reference genome | `string` | | | | -| `genome` | Name of iGenomes reference.
HelpIf using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.

See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.
| `string` | | | | -| `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | True | | True | - -## Institutional config options - -Parameters used to describe centralised config profiles. These should not be edited. - -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------------------------------- | -------- | ------ | -| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | -| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.
| `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | -| `config_profile_name` | Institutional config name. | `string` | | | True | -| `config_profile_description` | Institutional config description. | `string` | | | True | -| `config_profile_contact` | Institutional config contact information. | `string` | | | True | -| `config_profile_url` | Institutional config URL link. | `string` | | | True | - -## Max job request options - -Set the top limit for requested resources for any single job. - -| Parameter | Description | Type | Default | Required | Hidden | -| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `max_cpus` | Maximum number of CPUs that can be requested for any single job.
HelpUse to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`
| `integer` | 16 | | True | -| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`
| `string` | 128.GB | | True | -| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`
| `string` | 240.h | | True | - -## Generic options - -Less common options for the pipeline, typically set in a config file. - -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------- | ------- | -------- | ------ | -| `help` | Display help text. | `boolean` | | | True | -| `version` | Display version and exit. | `boolean` | | | True | -| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.
| `string` | copy | | True | -| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.
| `string` | | | True | -| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | -| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | -| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | -| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported.
| `string` | | | True | -| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True | -| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True | -| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | | -| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | -| `validationShowHiddenParams` | Show all params when using `--help`
HelpBy default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.
| `boolean` | | | True | -| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True | -| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True | -| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True | - -## Workflow options - -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------- | ------------------------------------------ | --------- | ----------- | -------- | ------ | -| `preset` | Choose a preset depending on data type | `string` | revio | True | | -| `variant_caller` | Choose variant caller | `string` | deepvariant | | | -| `phaser` | Choose phasing software | `string` | whatshap | | | -| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode | `string` | hifi-only | | | -| `split_fastq` | Split alignment into n jobs | `integer` | 0 | | | -| `parallel_snv` | Split SNV calling into n chunks | `integer` | 13 | | | - -## Extra file inputs - -Different processes may need extra input files - -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `dipcall_par` | Provide a bed file of chrX PAR regions for dipcall | `string` | | | | -| `extra_gvcfs` | Extra input files for GLNexus | `string` | | | | -| `extra_snfs` | Extra input files for Sniffles | `string` | | | | -| `tandem_repeats` | Tandem repeat BED-file for sniffles | `string` | | | | -| `trgt_repeats` | BED-file for repeats to be genotyped | `string` | | | | -| `snp_db` | Extra echtvar-databases to annotate SNVs with | `string` | | | | -| `vep_cache` | Path to directory of vep_cache | `string` | | | | -| `bed` | BED file with regions of interest | `string` | | | | -| `hificnv_xy` | | `string` | | | | -| `hificnv_xx` | | `string` | | | | -| `hificnv_exclude` | HiFiCNV BED file specifying regions to exclude | `string` | | | | -| `somalier_sites` | A VCF of known polymorphic sites | `string` | | | | -| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
HelpBy default, when an unrecognised parameter is found, it returns a warning.
| `boolean` | | | True | -| `validationLenientMode` | Validation of parameters in lenient more.
HelpAllows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).
| `boolean` | | | True | +This subworkflow relies on mapping, short variant calling and phasing subworkflows, but requires no additional files. -### Updating the pipeline +### Repeat calling (`--skip_repeat_calling`) + +This subworkflow requires haplotagged BAM files, and such relies on the mapping, short variant calling and phasing subworkflows, and requires the following additional files: + +| Parameter | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `trgt_repeats` | a BED file with tandem repeats matching your reference genome (e.g. [pathogenic_repeats.hg38.bed](<[https://github.com/PacificBiosciences/trgt/tree/main/repeats](https://github.com/PacificBiosciences/trgt/raw/main/repeats/pathogenic_repeats.hg38.bed)>)) | + +### Repeat annotation (`--skip_repeat_annotation`) + +This subworkflow relies on the mapping, short variant calling, phasing and repeat calling subworkflows, and requires the following additional files: + +| Parameter | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `variant_catalog` | a variant catalog matching your reference (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) | + +### SNV annotation (`--skip_snv_annotation`) + +This subworkflow relies on the mapping and short variant calling, and requires the following additional files: + + + +| Parameter | Description | +| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `vep_cache` | VEP cache matching your reference genome, either as a `.tar.gz` archive or path to a directory (e.g. [homo_sapiens_vep_110_GRCh38.tar.gz](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz)) | +| `vep_plugins` 1 | A csv file with VEP plugin files, pLI and LoFtool are required. Example provided below. | +| `snp_db` 2 |  A csv file with annotation databases from ([`echtvar encode`](https://github.com/brentp/echtvar)) (e.g. [gnomad.v3.1.2.echtvar.popmax.v2.zip](https://surfdrive.surf.nl/files/index.php/s/LddbAYQAYPqtYu6/download)) | +| `variant_consequences_snv` | A list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://ensembl.org/info/genome/variation/prediction/predicted_data.html) | + +1 Example file for input with `--vep_plugins` + +- If running without `--skip_snv_annotation`, `--variant_consequences_snv` is also required (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)). -```bash -nextflow pull genomic-medicine-sweden/nallo ``` +vep_files +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool.pm +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/dbNSFP.pm +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/plugin_config.txt +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz.tbi +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz.tbi +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI_values.txt +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI.pm +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/MaxEntScan.pm +https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool_scores.txt +``` + +2 Example file for input with `--vep_plugins`: + +``` +sample,file +gnomad,/path/to/gnomad.v3.1.2.echtvar.popmax.v2.zip +cadd,/path/to/cadd.v1.6.hg38.zip +``` + +> [!NOTE] +> Optionally, to calcuate CADD scores for small indels, supply a path to a folder containing cadd annotations with `--cadd_resources` and prescored indels with `--cadd_prescored`. Equivalent of the `data/annotations/` and `data/prescored/` folders described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation). CADD scores for SNVs can be annotated through echvtvar and `--snp_db`. + +### Rank variants (`--skip_rank_variants`) + +This subworkflow relies on the mapping, short variant calling and SNV annotation subworkflows, and requires the following additional files: + +| Parameter | Description | +| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `score_config_snv` |  Used by GENMOD when ranking variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini). | +| `reduced_penetrance` | A list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)) | + +### Other highlighted parameters -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +- Limit SNV calling to regions in BED file (`--bed`). +- By default SNV-calling is split into 13 parallel processes, this speeds up the variant calling significantly. Limit this by setting `--parallel_snv` to a different number. +- By default the pipeline does not perform parallel alignment, but this can be changed by setting `--parallel_alignments` to split the alignment into multiple processes. This comes with some additional overhead, but speeds up the alignment significantly. ### Reproducibility It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [genomic-medicine-sweden/nallo releases page](https://github.com/genomic-medicine-sweden/nallo/releases) and find the latest pipeline version - numeric only (eg. `0.1.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 0.1.0`. Of course, you can switch to another version by changing the number after the `-r` flag. +First, go to the [genomic-medicine-sweden/nallo releases page](https://github.com/genomic-medicine-sweden/nallo/releases) and find the latest pipeline version - numeric only (eg. `0.2.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 0.2.0`. Of course, you can switch to another version by changing the number after the `-r` flag. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. diff --git a/lib/CustomFunctions.groovy b/lib/CustomFunctions.groovy deleted file mode 100644 index 4f9979b4..00000000 --- a/lib/CustomFunctions.groovy +++ /dev/null @@ -1,21 +0,0 @@ -import nextflow.Nextflow - -class CustomFunctions { - - // Function to generate a pedigree file - public static File makePed(samples, outdir) { - def case_name = "multisample" - def outfile = new File(outdir +"/pipeline_info/${case_name}" + '.ped') - outfile.text = ['#family_id', 'sample_id', 'father', 'mother', 'sex', 'phenotype'].join('\t') - def samples_list = [] - for(int i = 0; i versions.yml + "${task.process}": + add_most_severe_consequence: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_csq.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + add_most_severe_consequence: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/add_most_severe_pli.nf b/modules/local/add_most_severe_pli.nf new file mode 100644 index 00000000..f2975a22 --- /dev/null +++ b/modules/local/add_most_severe_pli.nf @@ -0,0 +1,44 @@ +process ADD_MOST_SEVERE_PLI { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.vcf") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + add_most_severe_pli.py --file_in ${vcf} --file_out ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + add_most_severe_pli: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_pli.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + add_most_severe_pli: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/bcftools/filltags/main.nf b/modules/local/bcftools/filltags/main.nf index 51c16477..67cbde18 100644 --- a/modules/local/bcftools/filltags/main.nf +++ b/modules/local/bcftools/filltags/main.nf @@ -12,6 +12,8 @@ process BCFTOOLS_FILLTAGS { output: tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.csi") , emit: csi, optional: true + tuple val(meta), path("*.tbi") , emit: tbi, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/local/bcftools/view_regions.nf b/modules/local/bcftools/view_regions.nf deleted file mode 100644 index 6e697092..00000000 --- a/modules/local/bcftools/view_regions.nf +++ /dev/null @@ -1,52 +0,0 @@ -process BCFTOOLS_VIEW_REGIONS { - tag "$meta.id" - label 'process_low' - - conda "bioconda::bcftools=1.20" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.20--haef29d1_0': - 'biocontainers/bcftools:1.20--haef29d1_0' }" - - input: - tuple val(meta), path(vcf), path(index) - tuple val(meta2), path(bed) - - output: - tuple val(meta), path("*.gz") , emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - //def underscore_regions = region.replaceAll(/[:-]/, "_") - //def output_name = region ? "${prefix}" + "." + "${underscore_regions}" + ".g": "${prefix}" + ".g" - - """ - bcftools view \\ - --output ${prefix}.${bed}.vcf.gz \\ - -R ${bed} \\ - $args \\ - --threads $task.cpus \\ - ${vcf} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/convert_ont_read_names.nf b/modules/local/convert_ont_read_names.nf deleted file mode 100644 index 3c1d8ca7..00000000 --- a/modules/local/convert_ont_read_names.nf +++ /dev/null @@ -1,68 +0,0 @@ -process CONVERT_ONT_READ_NAMES { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::samtools=1.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" - - input: - tuple val(meta), path(input), path(index) - - output: - tuple val(meta), path("*.bam"), path("*.bai"), emit: bam_bai - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() - if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - - // The SED looks for readnames in a BAM file - // and replaces all hyphens - with underscores _, - // then it adds /77923358/ccs/ to the read name - // to make ONT read names look like they come from PacBio. - // - // The first part of the sed command: '/^[^@]/s/-/_/g - // Matches lines that do not start with @ (matches the reads) and replaces all occurrences of - with _. - // - // The second part: ;/^[^@]/s/^([^[:space:]]+)/\\1\\/77923358\\/ccs/' - // Also matches lines that do not start with @, - // and replaces the first sequence of non-space characters (readname) with itself (\\1), followed by /77923358/ccs/. - """ - samtools view -x MM,ML --threads ${(task.cpus-1)/2} -h $input |\\ - sed -E '/^[^@]/s/-/_/g;/^[^@]/s/^([^[:space:]]+)/\\1\\/77923358\\/ccs/' |\\ - samtools \\ - view \\ - --threads ${(task.cpus-1)/2} \\ - $args \\ - -o ${prefix}.${file_type} - - samtools index -@ ${task.cpus-1} ${prefix}.${file_type} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.cram - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/create_pedigree_file.nf b/modules/local/create_pedigree_file.nf new file mode 100644 index 00000000..18480df5 --- /dev/null +++ b/modules/local/create_pedigree_file.nf @@ -0,0 +1,51 @@ +process CREATE_PEDIGREE_FILE { + tag "${project}" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + tuple val(project), val(samples) + + output: + tuple val(project), path("*.ped"), emit: ped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def out = new File(project + ".ped") + outfile_text = ['#family_id', 'sample_id', 'father', 'mother', 'sex', 'phenotype'].join('\\t') + def samples_list = [] + for(int i = 0; i${project}.ped + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + create_pedigree_file: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch ${project}.ped + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + create_pedigree_file: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/dipcall/enviroment.yml b/modules/local/dipcall/enviroment.yml new file mode 100644 index 00000000..6bfbf1a9 --- /dev/null +++ b/modules/local/dipcall/enviroment.yml @@ -0,0 +1,7 @@ +name: dipcall +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::dipcall=0.3 diff --git a/modules/local/dipcall.nf b/modules/local/dipcall/main.nf similarity index 97% rename from modules/local/dipcall.nf rename to modules/local/dipcall/main.nf index a001e6c4..e19c7641 100644 --- a/modules/local/dipcall.nf +++ b/modules/local/dipcall/main.nf @@ -1,10 +1,9 @@ -//TODO: Mapper choice process DIPCALL { tag "$meta.id" label 'process_high' conda "bioconda::dipcall=0.3" - container "quay.io/biocontainers/dipcall:0.3--hdfd78af_0" + container "biocontainers/dipcall:0.3--hdfd78af_0" // This is bad but I don't know where it's going wrong, a test dataset is really needed for this process // Could change to a subworkflow as well... @@ -17,7 +16,7 @@ process DIPCALL { tuple val(meta2), path(reference) tuple val(meta3), path(fai) tuple val(meta4), path(mmi) - path(par) + tuple val(meta5), path(par) output: tuple val(meta), path("*.dip.vcf.gz") , emit: variant_calls @@ -31,7 +30,6 @@ process DIPCALL { tuple val(meta), path("*.pair.vcf.gz"), emit: pair tuple val(meta), path("*.tmp") , emit: tmp, optional: true path "versions.yml" , emit: versions - //TODO: Add all outputs when: task.ext.when == null || task.ext.when diff --git a/modules/local/echtvar/anno/Dockerfile b/modules/local/echtvar/anno/Dockerfile new file mode 100644 index 00000000..a0cdb370 --- /dev/null +++ b/modules/local/echtvar/anno/Dockerfile @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 +FROM ubuntu:22.04 + +USER root + +RUN apt-get update && apt-get install -y procps sed && rm -rf /var/lib/apt/lists/* +# install app +COPY echtvar /usr/bin/echtvar + +# final configuration +ENV FLASK_APP=echtvar diff --git a/modules/local/echtvar/anno/README.md b/modules/local/echtvar/anno/README.md new file mode 100644 index 00000000..b4b06510 --- /dev/null +++ b/modules/local/echtvar/anno/README.md @@ -0,0 +1,8 @@ +This docker was created with the following commands: + +```bash +wget https://github.com/brentp/echtvar/releases/download/v0.2.0/echtvar +chmod +x echtvar +docker build . -t fellen31/echtvar:0.2.0 +docker push fellen31/echtvar:0.2.0 +``` diff --git a/modules/local/echtvar/anno/main.nf b/modules/local/echtvar/anno/main.nf index b9522494..c07f27a8 100644 --- a/modules/local/echtvar/anno/main.nf +++ b/modules/local/echtvar/anno/main.nf @@ -2,7 +2,7 @@ process ECHTVAR_ANNO { tag "$meta.id" label 'process_single' - container "docker.io/fellen31/echtvar:latest" + container "docker.io/fellen31/echtvar:0.2.0" input: tuple val(meta), path(vcf) diff --git a/modules/local/echtvar/encode/Dockerfile b/modules/local/echtvar/encode/Dockerfile new file mode 100644 index 00000000..a0cdb370 --- /dev/null +++ b/modules/local/echtvar/encode/Dockerfile @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 +FROM ubuntu:22.04 + +USER root + +RUN apt-get update && apt-get install -y procps sed && rm -rf /var/lib/apt/lists/* +# install app +COPY echtvar /usr/bin/echtvar + +# final configuration +ENV FLASK_APP=echtvar diff --git a/modules/local/echtvar/encode/README.md b/modules/local/echtvar/encode/README.md new file mode 100644 index 00000000..b4b06510 --- /dev/null +++ b/modules/local/echtvar/encode/README.md @@ -0,0 +1,8 @@ +This docker was created with the following commands: + +```bash +wget https://github.com/brentp/echtvar/releases/download/v0.2.0/echtvar +chmod +x echtvar +docker build . -t fellen31/echtvar:0.2.0 +docker push fellen31/echtvar:0.2.0 +``` diff --git a/modules/local/echtvar/encode/main.nf b/modules/local/echtvar/encode/main.nf index aedb277d..a022b75c 100644 --- a/modules/local/echtvar/encode/main.nf +++ b/modules/local/echtvar/encode/main.nf @@ -2,7 +2,7 @@ process ECHTVAR_ENCODE { tag "$meta.id" label 'process_single' - container "docker.io/fellen31/echtvar:latest" + container "docker.io/fellen31/echtvar:0.2.0" input: tuple val(meta), path(bcf) @@ -23,12 +23,12 @@ process ECHTVAR_ENCODE { [ { "field": "AF", - "alias": "COHORT_AF", + "alias": "${meta.id}_af", "multiplier": 1000000 }, { "field": "AC", - "alias": "COHORT_AC", + "alias": "${meta.id}_ac", "multiplier": 1000000 }, diff --git a/modules/local/fqcrs/Dockerfile b/modules/local/fqcrs/Dockerfile new file mode 100644 index 00000000..955356d5 --- /dev/null +++ b/modules/local/fqcrs/Dockerfile @@ -0,0 +1,9 @@ +# syntax=docker/dockerfile:1 +FROM ubuntu:22.04 + +RUN apt-get update && apt-get install -y procps sed zstd && rm -rf /var/lib/apt/lists/* +# install app +COPY fqcrs /usr/bin/fqcrs + +# final configuration +ENV FLASK_APP=fqcrs diff --git a/modules/local/fqcrs.nf b/modules/local/fqcrs/main.nf similarity index 100% rename from modules/local/fqcrs.nf rename to modules/local/fqcrs/main.nf diff --git a/modules/local/hiphase/main.nf b/modules/local/hiphase/main.nf index df30576c..44b5d4bf 100644 --- a/modules/local/hiphase/main.nf +++ b/modules/local/hiphase/main.nf @@ -42,7 +42,7 @@ process HIPHASE { vcfInputs.add('--vcf') vcfInputs.add(vcf) vcfOutputs.add('--output-vcf') - vcfOutputs.add("${vcf}.phased.vcf.gz") + vcfOutputs.add("${prefix}.vcf.gz") vcfNames.add(vcf.getName()) } @@ -58,7 +58,7 @@ process HIPHASE { if(output_bam) { bamOutputs.add('--output-bam') - bamOutputs.add("${bam}.haplotagged.bam") + bamOutputs.add("${prefix}.bam") } } diff --git a/modules/local/split_bed_chunks/main.nf b/modules/local/split_bed_chunks/main.nf index 7a3fd8b1..f7073c8b 100644 --- a/modules/local/split_bed_chunks/main.nf +++ b/modules/local/split_bed_chunks/main.nf @@ -1,5 +1,5 @@ process SPLIT_BED_CHUNKS { - tag "$meta" + tag "$meta.id" container "quay.io/biocontainers/pandas:1.5.2" def split_bed_chunks_version = "1.0" diff --git a/modules/local/trgt/enviroment.yml b/modules/local/trgt/enviroment.yml new file mode 100644 index 00000000..e6c0944a --- /dev/null +++ b/modules/local/trgt/enviroment.yml @@ -0,0 +1,7 @@ +name: trgt +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::trgt=0.4.0 diff --git a/modules/local/trgt.nf b/modules/local/trgt/main.nf similarity index 89% rename from modules/local/trgt.nf rename to modules/local/trgt/main.nf index 10b3926b..0c493bf4 100644 --- a/modules/local/trgt.nf +++ b/modules/local/trgt/main.nf @@ -2,11 +2,13 @@ process TRGT { tag "$meta.id" label 'process_medium' - container "quay.io/pacbio/trgt:0.4.0" + conda "${moduleDir}/environment.yml" + container "biocontainers/trgt:0.7.0--hdfd78af_0" input: tuple val(meta), path(bam), path(bai), val(sex) tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) path(repeats) output: diff --git a/modules/local/whatshap/haplotag/main.nf b/modules/local/whatshap/haplotag/main.nf index 625a92e1..d4cce45e 100644 --- a/modules/local/whatshap/haplotag/main.nf +++ b/modules/local/whatshap/haplotag/main.nf @@ -23,7 +23,6 @@ process WHATSHAP_HAPLOTAG { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO: Include samtools in container and pipe to samtools view instead """ whatshap haplotag \\ $args \\ diff --git a/modules/local/whatshap/stats/main.nf b/modules/local/whatshap/stats/main.nf index e2b5c3f6..398f3013 100644 --- a/modules/local/whatshap/stats/main.nf +++ b/modules/local/whatshap/stats/main.nf @@ -9,9 +9,9 @@ process WHATSHAP_STATS { tuple val(meta), path(vcf), path(tbi) output: - tuple val(meta), path("*.stats.tsv.gz"), emit: stats - tuple val(meta), path("*.blocks.tsv") , emit: blocks - path "versions.yml" , emit: versions + tuple val(meta), path("*.stats.tsv") , emit: stats + tuple val(meta), path("*.blocks.tsv"), emit: blocks + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,7 +23,7 @@ process WHATSHAP_STATS { whatshap stats \\ $args \\ --sample ${meta.id} \\ - --tsv ${prefix}.stats.tsv.gz \\ + --tsv ${prefix}.stats.tsv \\ --block-list ${prefix}.blocks.tsv \\ $vcf diff --git a/modules/nf-core/bcftools/annotate/bcftools-annotate.diff b/modules/nf-core/bcftools/annotate/bcftools-annotate.diff new file mode 100644 index 00000000..391089ff --- /dev/null +++ b/modules/nf-core/bcftools/annotate/bcftools-annotate.diff @@ -0,0 +1,47 @@ +Changes in module 'nf-core/bcftools/annotate' +--- modules/nf-core/bcftools/annotate/meta.yml ++++ modules/nf-core/bcftools/annotate/meta.yml +@@ -34,6 +34,9 @@ + - header_lines: + type: file + description: Contains lines to append to the output VCF header ++ - rename_chrs: ++ type: file ++ description: Contains new and old chromosome names + output: + - meta: + type: map + +--- modules/nf-core/bcftools/annotate/main.nf ++++ modules/nf-core/bcftools/annotate/main.nf +@@ -8,10 +8,9 @@ + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: +- tuple val(meta), path(input), path(index) +- path(annotations) +- path(annotations_index) ++ tuple val(meta), path(input), path(index), path(annotations), path(annotations_index) + path(header_lines) ++ path(rename_chrs) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf +@@ -27,6 +26,7 @@ + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' ++ def rename_chrs_file = rename_chrs ? "--rename-chrs ${rename_chrs}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : +@@ -43,6 +43,7 @@ + $args \\ + $annotations_file \\ + $header_file \\ ++ $rename_chrs_file \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + +************************************************************ diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..3d4e3379 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..06eb6d34 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,81 @@ +process BCFTOOLS_ANNOTATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index) + path(header_lines) + path(rename_chrs) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def rename_chrs_file = rename_chrs ? "--rename-chrs ${rename_chrs}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index_command = !index ? "bcftools index $input" : '' + + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + $index_command + + bcftools \\ + annotate \\ + $args \\ + $annotations_file \\ + $header_file \\ + $rename_chrs_file \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..78f806a4 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,67 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + - index: + type: file + description: Index of the query VCF or BCF file + - annotations: + type: file + description: Bgzip-compressed file with annotations + - annotations_index: + type: file + description: Index of the annotations file + - header_lines: + type: file + description: Contains lines to append to the output VCF header + - rename_chrs: + type: file + description: Contains new and old chromosome names +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + - csi: + type: file + description: Default VCF file index + pattern: "*.csi" + - tbi: + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/annotate/tests/bcf.config b/modules/nf-core/bcftools/annotate/tests/bcf.config new file mode 100644 index 00000000..79d26779 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = "-x ID,INFO/DP,FORMAT/DP --output-type u" + ext.prefix = { "${meta.id}_ann" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test b/modules/nf-core/bcftools/annotate/tests/main.nf.test new file mode 100644 index 00000000..8cd87abc --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test @@ -0,0 +1,317 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ANNOTATE" + script "../main.nf" + process "BCFTOOLS_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/annotate" + + test("sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, []] annotation, annotation_tbi, [] - vcf_output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + []] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match() } + ) + } + + } + test("sarscov2 - [vcf, tbi], annotation, annotation_tbi, [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], annotation, annotation_tbi, [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], annotation, annotation_tbi, [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + test("sarscov2 - [vcf, []], annotation, annotation_tbi, header - bcf_output") { + + config "./bcf.config" + + when { + process { + """ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + []]) + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = Channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match("bcf") } + ) + } + + } + + test("sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - stub") { + + config "./vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()}, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap new file mode 100644 index 00000000..8fd8d11d --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap @@ -0,0 +1,388 @@ +{ + "sarscov2 - [vcf, tbi], annotation, annotation_tbi, [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi" + ] + ], + [ + + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:23.802873" + }, + "sarscov2 - [vcf, []] annotation, annotation_tbi, [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:38:57.039285" + }, + "sarscov2 - [vcf, tbi], annotation, annotation_tbi, [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:15.152697" + }, + "sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:41.994785" + }, + "bcf": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:33.331888" + }, + "sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:40:13.835994" + }, + "sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:38:48.368629" + }, + "sarscov2 - [vcf, tbi], annotation, annotation_tbi, [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:05.608108" + }, + "sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:54.842082" + }, + "sarscov2 - [vcf, tbi] annotation, annotation_tbi, [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:40:04.074052" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/tags.yml b/modules/nf-core/bcftools/annotate/tests/tags.yml new file mode 100644 index 00000000..f97a1afc --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/annotate: + - "modules/nf-core/bcftools/annotate/**" diff --git a/modules/nf-core/bcftools/annotate/tests/vcf.config b/modules/nf-core/bcftools/annotate/tests/vcf.config new file mode 100644 index 00000000..611868d5 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = "-x ID,INFO/DP,FORMAT/DP --output-type z" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config new file mode 100644 index 00000000..2fd9a225 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.args = "--output-type z --write-index --no-version" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..512c1dfb --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.args = "--output-type z --write-index=csi --no-version" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..7feb5ebb --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.args = "--output-type z --write-index=tbi --no-version" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test b/modules/nf-core/bcftools/concat/tests/main.nf.test index b42d065e..cea386e3 100644 --- a/modules/nf-core/bcftools/concat/tests/main.nf.test +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test @@ -20,12 +20,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -51,12 +51,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -88,12 +88,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -125,12 +125,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -163,8 +163,8 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [] ] @@ -192,12 +192,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -224,12 +224,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -257,12 +257,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ @@ -290,12 +290,12 @@ nextflow_process { input[0] = [ [ id:'test3' ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) ] ] """ diff --git a/modules/nf-core/bcftools/pluginsplit/environment.yml b/modules/nf-core/bcftools/pluginsplit/environment.yml new file mode 100644 index 00000000..1f7bb1ff --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_pluginsplit +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf new file mode 100644 index 00000000..082802be --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -0,0 +1,79 @@ +process BCFTOOLS_PLUGINSPLIT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path(samples) + path(groups) + path(regions) + path(targets) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def samples_arg = samples ? "--samples-file ${samples}" : "" + def groups_arg = groups ? "--groups-file ${groups}" : "" + def regions_arg = regions ? "--regions-file ${regions}" : "" + def targets_arg = targets ? "--targets-file ${targets}" : "" + + """ + bcftools plugin split \\ + ${args} \\ + ${vcf} \\ + ${samples_arg} \\ + ${groups_arg} \\ + ${regions_arg} \\ + ${targets_arg} \\ + --output ${prefix} + + mv ${prefix}/* . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def determination_file = samples ?: targets + def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " + def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" + def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/pluginsplit/meta.yml b/modules/nf-core/bcftools/pluginsplit/meta.yml new file mode 100644 index 00000000..64bfce0a --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/meta.yml @@ -0,0 +1,74 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "bcftools_pluginsplit" +description: Split VCF by sample, creating single- or multi-sample VCFs. +keywords: + - split + - vcf + - genomics +tools: + - pluginsplit: + description: | + Split VCF by sample, creating single- or multi-sample VCFs. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF file to split + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + type: file + description: OPTIONAL - The index of the input VCF/BCF + pattern: "*.tbi" + - samples: + type: file + description: | + A tab-separated file determining which samples should be in which output file + column 1: The sample name(s) in the input file + column 2: The sample name(s) to use in the output file (use `-` to keep the original name) + column 3: The name of the output file + Either this or a groups file should be given + pattern: "*" + - groups: + type: file + description: | + A tab-separated file determining which samples should be in which output file(s) + column 1: The sample name(s) in the input file + column 2: The sample name(s) to use in the output file (use `-` to keep the original name) + column 3: The name of the output file(s) + Either this or a samples file should be given + pattern: "*" + - regions: + type: file + description: A BED file containing regions to use + pattern: "*.bed" + - targets: + type: file + description: A BED file containing regions to use (but streams rather than index-jumps) + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: The resulting VCF files from the split + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test new file mode 100644 index 00000000..e3160851 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process BCFTOOLS_PLUGINSPLIT" + script "../main.nf" + process "BCFTOOLS_PLUGINSPLIT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/pluginsplit" + + test("homo_sapiens - [ vcf, tbi ], samples, [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.match).match() } + + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.tbi.get(0).get(1).find { file(it).name.matches("normal.vcf.gz.tbi|tumor.vcf.gz.tbi") }, + ) } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap new file mode 100644 index 00000000..66c3c1dd --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap @@ -0,0 +1,240 @@ +{ + "homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "tumour.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "tumour.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-09T15:56:42.307673651" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-09T15:56:21.498991402" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "40001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "40001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-09T15:57:04.483688966" + }, + "homo_sapiens - [ vcf, tbi ], samples, [], [], []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,69cfc4bf92bf3e2847081a2026a4d3bb", + "tumour.vcf:md5,08fa5c8d5561c2a8d7c300cb0eea1042" + ] + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,69cfc4bf92bf3e2847081a2026a4d3bb", + "tumour.vcf:md5,08fa5c8d5561c2a8d7c300cb0eea1042" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-09T15:56:10.033818589" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-09T15:56:53.641165787" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config b/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config new file mode 100644 index 00000000..9b9a4783 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BCFTOOLS_PLUGINSPLIT' { + ext.args = '--write-index=tbi --output-type z' + } +} diff --git a/modules/nf-core/bcftools/pluginsplit/tests/tags.yml b/modules/nf-core/bcftools/pluginsplit/tests/tags.yml new file mode 100644 index 00000000..2f29ef18 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/pluginsplit: + - "modules/nf-core/bcftools/pluginsplit/**" diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test b/modules/nf-core/bcftools/sort/tests/main.nf.test index 1207a739..b9bdd76a 100644 --- a/modules/nf-core/bcftools/sort/tests/main.nf.test +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -39,7 +39,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -69,7 +69,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -99,7 +99,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -127,7 +127,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -152,7 +152,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -178,7 +178,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } @@ -204,7 +204,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] """ } diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 00000000..128fe204 --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,8 @@ +name: bcftools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 00000000..20e5da77 --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 00000000..7ea2103e --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,77 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test new file mode 100644 index 00000000..be618b0b --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test @@ -0,0 +1,182 @@ +nextflow_process { + + name "Test Process BCFTOOLS_STATS" + script "../main.nf" + process "BCFTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/stats" + + test("sarscov2 - vcf_gz") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - regions") { + + when { + process { + """ + input[0] = [ [ id:'regions_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + input[1] = [ [id:'regions_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("regions_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - targets") { + + when { + process { + """ + input[0] = [ [ id:'targets_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [id:'targets_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("targets_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - exons") { + + when { + process { + """ + input[0] = [ [ id:'exon_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [id: "exon_test"], + file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("exon_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - reference") { + + when { + process { + """ + input[0] = [ [ id:'ref_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [id: 'ref_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("ref_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + + test("sarscov2 - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..cd8cff6d --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap @@ -0,0 +1,180 @@ +{ + "sarscov2 - vcf_gz - reference": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:35.506777837" + }, + "sarscov2 - vcf_gz - exons": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:30.57486244" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:27.637515559" + }, + "sarscov2 - vcf_gz - targets": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:25.732997442" + }, + "regions_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:32.559884458" + }, + "targets_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:37.512009805" + }, + "sarscov2 - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ], + "stats": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:57:09.614976125" + }, + "exon_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:42.347397266" + }, + "ref_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:47.26823622" + }, + "sarscov2 - vcf_gz": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:27.670416598" + }, + "sarscov2 - vcf_gz - regions": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:20.759094062" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/tags.yml b/modules/nf-core/bcftools/stats/tests/tags.yml new file mode 100644 index 00000000..53c12d92 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/stats: + - "modules/nf-core/bcftools/stats/**" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test index 14a7283d..1e60c50d 100644 --- a/modules/nf-core/bcftools/view/tests/main.nf.test +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -18,8 +18,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -49,8 +49,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -83,8 +83,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -117,8 +117,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -151,11 +151,11 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] - input[1] = file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) - input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_targets_tsv_gz'], checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) input[3] = [] """ } @@ -183,8 +183,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -215,8 +215,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -245,8 +245,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -275,8 +275,8 @@ nextflow_process { """ input[0] = [ [ id:'out', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [] input[2] = [] diff --git a/modules/nf-core/bedtools/merge/environment.yml b/modules/nf-core/bedtools/merge/environment.yml new file mode 100644 index 00000000..99707878 --- /dev/null +++ b/modules/nf-core/bedtools/merge/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf new file mode 100644 index 00000000..5310647d --- /dev/null +++ b/modules/nf-core/bedtools/merge/main.nf @@ -0,0 +1,47 @@ +process BEDTOOLS_MERGE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + merge \\ + -i $bed \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml new file mode 100644 index 00000000..d7463e3d --- /dev/null +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -0,0 +1,45 @@ +name: bedtools_merge +description: combines overlapping or “book-ended” features in an interval file into a single feature which spans all of the combined features. +keywords: + - bed + - merge + - bedtools + - overlapped bed +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Overlapped bed file with combined features + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test b/modules/nf-core/bedtools/merge/tests/main.nf.test new file mode 100644 index 00000000..95dba8e5 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process BEDTOOLS_MERGE" + script "../main.nf" + config "./nextflow.config" + process "BEDTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/merge" + + test("test_bedtools_merge") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test.snap b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..ee6c4e63 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_bedtools_merge": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "1": [ + "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6" + ], + "bed": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "versions": [ + "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T17:07:09.721153" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/tests/nextflow.config b/modules/nf-core/bedtools/merge/tests/nextflow.config new file mode 100644 index 00000000..16444e98 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BEDTOOLS_MERGE { + ext.prefix = { "${meta.id}_out" } + } + +} diff --git a/modules/nf-core/bedtools/merge/tests/tags.yml b/modules/nf-core/bedtools/merge/tests/tags.yml new file mode 100644 index 00000000..60c8cad1 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/merge: + - "modules/nf-core/bedtools/merge/**" diff --git a/modules/nf-core/bedtools/sort/environment.yml b/modules/nf-core/bedtools/sort/environment.yml new file mode 100644 index 00000000..87b2e425 --- /dev/null +++ b/modules/nf-core/bedtools/sort/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf new file mode 100644 index 00000000..b833150a --- /dev/null +++ b/modules/nf-core/bedtools/sort/main.nf @@ -0,0 +1,54 @@ +process BEDTOOLS_SORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals) + path genome_file + + output: + tuple val(meta), path("*.${extension}"), emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genome_cmd = genome_file ? "-g $genome_file" : "" + extension = task.ext.suffix ?: intervals.extension + if ("$intervals" == "${prefix}.${extension}") { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } + """ + bedtools \\ + sort \\ + -i $intervals \\ + $genome_cmd \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: intervals.extension + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml new file mode 100644 index 00000000..7c915f5f --- /dev/null +++ b/modules/nf-core/bedtools/sort/meta.yml @@ -0,0 +1,54 @@ +name: bedtools_sort +description: Sorts a feature file by chromosome and other criteria. +keywords: + - bed + - sort + - bedtools + - chromosome +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BED/BEDGRAPH + pattern: "*.{bed|bedGraph}" + - genome_file: + type: file + description: | + Optional reference genome 2 column file that defines the expected chromosome order. + pattern: "*.{fai,txt,chromsizes}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sorted: + type: file + description: Sorted output file + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" + - "@adamrtalbot" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" + - "@adamrtalbot" diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test b/modules/nf-core/bedtools/sort/tests/main.nf.test new file mode 100644 index 00000000..b1f36dd9 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process BEDTOOLS_SORT" + script "../main.nf" + config "./nextflow.config" + process "BEDTOOLS_SORT" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/sort" + + test("test_bedtools_sort") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + test("test_bedtools_sort_with_genome") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test.snap b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..f10e8b98 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test_bedtools_sort_with_genome": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "1": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "versions": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T10:13:11.830452" + }, + "test_bedtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "1": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "versions": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T10:16:40.535947" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/nextflow.config b/modules/nf-core/bedtools/sort/tests/nextflow.config new file mode 100644 index 00000000..f203c99c --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: BEDTOOLS_SORT { + ext.prefix = { "${meta.id}_out" } + ext.suffix = "testtext" + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/tags.yml b/modules/nf-core/bedtools/sort/tests/tags.yml new file mode 100644 index 00000000..47c85eea --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/sort: + - "modules/nf-core/bedtools/sort/**" diff --git a/modules/nf-core/cadd/cadd.diff b/modules/nf-core/cadd/cadd.diff new file mode 100644 index 00000000..2ee51723 --- /dev/null +++ b/modules/nf-core/cadd/cadd.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/cadd' +--- modules/nf-core/cadd/main.nf ++++ modules/nf-core/cadd/main.nf +@@ -7,13 +7,14 @@ + + containerOptions { + (workflow.containerEngine == 'singularity') ? +- "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations" : +- "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations" ++ "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -B ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" : ++ "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -v ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" + } + + input: + tuple val(meta), path(vcf) + path(annotation_dir) ++ path(prescored_dir) + + output: + tuple val(meta), path("*.tsv.gz"), emit: tsv + +************************************************************ diff --git a/modules/nf-core/cadd/environment.yml b/modules/nf-core/cadd/environment.yml new file mode 100644 index 00000000..26f170e9 --- /dev/null +++ b/modules/nf-core/cadd/environment.yml @@ -0,0 +1,9 @@ +name: cadd +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cadd-scripts=1.6.post1 + - conda-forge::conda=4.14.0 + - conda-forge::mamba=1.4.0 diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf new file mode 100644 index 00000000..52490c64 --- /dev/null +++ b/modules/nf-core/cadd/main.nf @@ -0,0 +1,54 @@ +process CADD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container 'docker.io/biocontainers/cadd-scripts-with-envs:1.6.post1_cv1' + + containerOptions { + (workflow.containerEngine == 'singularity') ? + "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -B ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" : + "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -v ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" + } + + input: + tuple val(meta), path(vcf) + path(annotation_dir) + path(prescored_dir) + + output: + tuple val(meta), path("*.tsv.gz"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.6.post1" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + """ + cadd.sh \\ + -o ${prefix}.tsv.gz \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cadd: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.6.post1" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + """ + touch ${prefix}.tsv.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cadd: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml new file mode 100644 index 00000000..230ed9c0 --- /dev/null +++ b/modules/nf-core/cadd/meta.yml @@ -0,0 +1,49 @@ +name: "cadd" +description: CADD is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. +keywords: + - cadd + - annotate + - variants +tools: + - "cadd": + description: "CADD scripts release for offline scoring" + homepage: "https://cadd.gs.washington.edu/" + documentation: "https://github.com/kircherlab/CADD-scripts/blob/master/README.md" + tool_dev_url: "https://github.com/kircherlab/CADD-scripts/" + doi: "10.1093/nar/gky1016" + licence: + - Restricted. Free for non-commercial users. +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Input file for annotation in vcf or vcf.gz format + pattern: "*.{vcf,vcf.gz}" + - annotation_dir: + type: file + description: | + Path to folder containing the vcf files with precomputed CADD scores. + This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation. + pattern: "*.{vcf,vcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: Annotated tsv file + pattern: "*.{tsv,tsv.gz}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index f132b2ad..b68e5f91 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -53,9 +53,9 @@ process CAT_FASTQ { def prefix = task.ext.prefix ?: "${meta.id}" def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ - touch ${prefix}.merged.fastq.gz + echo '' | gzip > ${prefix}.merged.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -64,10 +64,10 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { """ - touch ${prefix}_1.merged.fastq.gz - touch ${prefix}_2.merged.fastq.gz + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index a71dcb8d..f88a78b6 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -13,9 +13,6 @@ nextflow_process { test("test_cat_fastq_single_end") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -38,9 +35,6 @@ nextflow_process { test("test_cat_fastq_paired_end") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -65,9 +59,6 @@ nextflow_process { test("test_cat_fastq_single_end_same_name") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -90,9 +81,6 @@ nextflow_process { test("test_cat_fastq_paired_end_same_name") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -117,9 +105,129 @@ nextflow_process { test("test_cat_fastq_single_end_single_file") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file - stub") { + + options "-stub" + + when { process { """ input[0] = Channel.of([ diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index 43dfe28f..aec119a9 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -28,6 +28,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { @@ -59,6 +63,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { @@ -90,6 +98,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { @@ -127,8 +139,123 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:33:33.031555" }, + "test_cat_fastq_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:28.244999" + }, + "test_cat_fastq_paired_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:57.070911" + }, + "test_cat_fastq_single_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:46.796254" + }, "test_cat_fastq_paired_end": { "content": [ { @@ -164,6 +291,86 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:32:02.270935" + }, + "test_cat_fastq_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:37.807553" + }, + "test_cat_fastq_single_end_single_file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:14:51.861264" } } \ No newline at end of file diff --git a/modules/nf-core/deepvariant/README.md b/modules/nf-core/deepvariant/README.md index ca112a7d..9d1ceb34 100644 --- a/modules/nf-core/deepvariant/README.md +++ b/modules/nf-core/deepvariant/README.md @@ -1,6 +1,8 @@ # Conda is not supported at the moment -The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected +The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected. + +See https://github.com/bioconda/bioconda-recipes/issues/30310 and https://github.com/nf-core/modules/issues/1754 for more information. Hence, we are using the docker container provided by the authors of the tool: diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index 507b6c11..8d3d0911 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -2,14 +2,17 @@ process DEEPVARIANT { tag "$meta.id" label 'process_high' - //Conda is not supported at the moment - container "nf-core/deepvariant:1.5.0" + // FIXME Conda is not supported at the moment + // BUG https://github.com/nf-core/modules/issues/1754 + // BUG https://github.com/bioconda/bioconda-recipes/issues/30310 + container "nf-core/deepvariant:1.6.1" input: tuple val(meta), path(input), path(index), path(intervals) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) output: tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf @@ -29,6 +32,10 @@ process DEEPVARIANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions = intervals ? "--regions=${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" + // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755 + // FIXME Revert this on next version bump + def VERSION = '1.6.1' """ /opt/deepvariant/bin/run_deepvariant \\ @@ -38,12 +45,13 @@ process DEEPVARIANT { --output_gvcf=${prefix}.g.vcf.gz \\ ${args} \\ ${regions} \\ - --intermediate_results_dir=. \\ + ${par_regions} \\ + --intermediate_results_dir=tmp \\ --num_shards=${task.cpus} cat <<-END_VERSIONS > versions.yml "${task.process}": - deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) + deepvariant: $VERSION END_VERSIONS """ @@ -53,6 +61,9 @@ process DEEPVARIANT { error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." } prefix = task.ext.prefix ?: "${meta.id}" + // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755 + // FIXME Revert this on next version bump + def VERSION = '1.6.1' """ touch ${prefix}.vcf.gz touch ${prefix}.vcf.gz.tbi @@ -61,7 +72,7 @@ process DEEPVARIANT { cat <<-END_VERSIONS > versions.yml "${task.process}": - deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) + deepvariant: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml index a50dc57d..2327dd5f 100644 --- a/modules/nf-core/deepvariant/meta.yml +++ b/modules/nf-core/deepvariant/meta.yml @@ -57,6 +57,15 @@ input: type: file description: GZI index of reference fasta file pattern: "*.gzi" + - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" output: - meta: type: map diff --git a/modules/nf-core/deepvariant/tests/main.nf.test b/modules/nf-core/deepvariant/tests/main.nf.test index 91612c1e..17765233 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test +++ b/modules/nf-core/deepvariant/tests/main.nf.test @@ -31,6 +31,9 @@ nextflow_process { input[3] = [ [],[] ] + input[4] = [ + [],[] + ] """ } } @@ -66,6 +69,48 @@ nextflow_process { input[3] = [ [],[] ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed") { + config "./nextflow-non-autosomal-calling.config" + tag "test" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true) + ] """ } } @@ -102,6 +147,9 @@ nextflow_process { [ id:'genome'], file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) ] + input[4] = [ + [],[] + ] """ } } diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap index 6ad76ae4..04f87774 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test.snap +++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap @@ -1,269 +1,358 @@ { - "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": { - "content": [ - { - "0": [ - [ + "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "4": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ], - "gvcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "gvcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "vcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "versions": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T12:09:40.987117305" }, - "timestamp": "2024-03-20T13:54:42.757335334" - }, - "homo_sapiens - [bam, bai] - fasta - fai": { - "content": [ - { - "0": [ - [ + "homo_sapiens - [bam, bai] - fasta - fai": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "4": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ], - "gvcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "gvcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "vcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "versions": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T12:08:47.058887374" }, - "timestamp": "2024-03-20T13:54:18.409489045" - }, - "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "2": [ - [ + "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "4": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ], - "gvcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "gvcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "vcf_tbi": [ - [ + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T12:09:13.952808655" + }, + "homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "versions": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T13:54:30.523871801" - } + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T14:29:24.939680679" + } } diff --git a/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config new file mode 100644 index 00000000..4be8986b --- /dev/null +++ b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config @@ -0,0 +1,8 @@ +process { + + withName: DEEPVARIANT { + ext.args = '--model_type=WGS --haploid_contigs chr22' + ext.prefix = { "${meta.id}_out" } + } + +} diff --git a/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff new file mode 100644 index 00000000..8626a663 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff @@ -0,0 +1,25 @@ +Changes in module 'nf-core/ensemblvep/vep' +--- modules/nf-core/ensemblvep/vep/main.nf ++++ modules/nf-core/ensemblvep/vep/main.nf +@@ -4,8 +4,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/ensembl-vep:111.0--pl5321h2a3209d_0' : +- 'biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' }" ++ 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : ++ 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(vcf), path(custom_extra_files) + +--- modules/nf-core/ensemblvep/vep/environment.yml ++++ modules/nf-core/ensemblvep/vep/environment.yml +@@ -4,4 +4,4 @@ + - bioconda + - defaults + dependencies: +- - bioconda::ensembl-vep=111.0 ++ - bioconda::ensembl-vep=110.0 + +************************************************************ diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml new file mode 100644 index 00000000..7a127746 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_vep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index da0e3646..c3e5d53e 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -2,7 +2,7 @@ process ENSEMBLVEP_VEP { tag "$meta.id" label 'process_medium' - conda "bioconda::ensembl-vep=110.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" @@ -20,7 +20,7 @@ process ENSEMBLVEP_VEP { tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf tuple val(meta), path("*.tab.gz") , optional:true, emit: tab tuple val(meta), path("*.json.gz") , optional:true, emit: json - path "*.summary.html" , emit: report + path "*.html" , optional:true, emit: report path "versions.yml" , emit: versions when: @@ -45,8 +45,7 @@ process ENSEMBLVEP_VEP { --cache \\ --cache_version $cache_version \\ --dir_cache $dir_cache \\ - --fork $task.cpus \\ - --stats_file ${prefix}.summary.html \\ + --fork $task.cpus cat <<-END_VERSIONS > versions.yml @@ -58,10 +57,10 @@ process ENSEMBLVEP_VEP { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf.gz - touch ${prefix}.tab.gz - touch ${prefix}.json.gz - touch ${prefix}.summary.html + echo "" | gzip > ${prefix}.vcf.gz + echo "" | gzip > ${prefix}.tab.gz + echo "" | gzip > ${prefix}.json.gz + touch ${prefix}_summary.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml index 7783847d..d8ff8d14 100644 --- a/modules/nf-core/ensemblvep/vep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -1,4 +1,4 @@ -name: ENSEMBLVEP_VEP +name: ensemblvep_vep description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. keywords: - annotation @@ -86,3 +86,7 @@ authors: - "@maxulysse" - "@matthdsm" - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test new file mode 100644 index 00000000..4aff84a3 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process ENSEMBLVEP_VEP" + script "../main.nf" + process "ENSEMBLVEP_VEP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ensemblvep" + tag "ensemblvep/vep" + tag "ensemblvep/download" + + test("test_ensemblvep_vep_fasta_vcf") { + config "./vcf.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + + process { + """ + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } + ) + } + + } + + test("test_ensemblvep_vep_fasta_tab_gz") { + config "./tab.gz.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + + process { + """ + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v111.0") } + ) + } + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap new file mode 100644 index 00000000..f937b299 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "test_ensemblvep_vep_fasta_tab_gz": { + "content": [ + [ + "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:35:20.694114" + }, + "test_ensemblvep_vep_fasta_vcf": { + "content": [ + [ + "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:34:41.093843" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config new file mode 100644 index 00000000..882bce41 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -0,0 +1,12 @@ +params { + vep_cache_version = "111" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" +} + +process { + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config new file mode 100644 index 00000000..40eb03e5 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config @@ -0,0 +1,5 @@ +process { + withName: ENSEMBLVEP_VEP { + ext.args = '--tab --compress_output bgzip' + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/tags.yml b/modules/nf-core/ensemblvep/vep/tests/tags.yml new file mode 100644 index 00000000..4aa4aa45 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/tags.yml @@ -0,0 +1,2 @@ +ensemblvep/vep: + - "modules/nf-core/ensemblvep/vep/**" diff --git a/modules/nf-core/ensemblvep/vep/tests/vcf.config b/modules/nf-core/ensemblvep/vep/tests/vcf.config new file mode 100644 index 00000000..ad8955a3 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/vcf.config @@ -0,0 +1,5 @@ +process { + withName: ENSEMBLVEP_VEP { + ext.args = '--vcf' + } +} diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf deleted file mode 100644 index 4fc19b74..00000000 --- a/modules/nf-core/fastp/main.nf +++ /dev/null @@ -1,120 +0,0 @@ -process FASTP { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : - 'biocontainers/fastp:0.23.4--h5f740d0_0' }" - - input: - tuple val(meta), path(reads) - path adapter_fasta - val save_trimmed_fail - val save_merged - - output: - tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads - tuple val(meta), path('*.json') , emit: json - tuple val(meta), path('*.html') , emit: html - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions - tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail - tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" - def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' - // Added soft-links to original fastqs for consistent naming in MultiQC - // Use single ended for interleaved. Add --interleaved_in in config. - if ( task.ext.args?.contains('--interleaved_in') ) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $adapter_list \\ - $fail_fastq \\ - $args \\ - 2> >(tee ${prefix}.fastp.log >&2) \\ - | gzip -c > ${prefix}.fastp.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - - fastp \\ - --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $adapter_list \\ - $fail_fastq \\ - $args \\ - 2> >(tee ${prefix}.fastp.log >&2) - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else { - def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz - fastp \\ - --in1 ${prefix}_1.fastq.gz \\ - --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $adapter_list \\ - $fail_fastq \\ - $merge_fastq \\ - --thread $task.cpus \\ - --detect_adapter_for_pe \\ - $args \\ - 2> >(tee ${prefix}.fastp.log >&2) - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end - def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" - def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" - """ - touch $touch_reads - touch "${prefix}.fastp.json" - touch "${prefix}.fastp.html" - touch "${prefix}.fastp.log" - $touch_merged - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml deleted file mode 100644 index c22a16ab..00000000 --- a/modules/nf-core/fastp/meta.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: fastp -description: Perform adapter/quality trimming on sequencing reads -keywords: - - trimming - - quality control - - fastq -tools: - - fastp: - description: | - A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. - documentation: https://github.com/OpenGene/fastp - doi: 10.1093/bioinformatics/bty560 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. If you wish to run interleaved paired-end data, supply as single-end data - but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. - - adapter_fasta: - type: file - description: File in FASTA format containing possible adapters to remove. - pattern: "*.{fasta,fna,fas,fa}" - - save_trimmed_fail: - type: boolean - description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` - - save_merged: - type: boolean - description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: The trimmed/modified/unmerged fastq reads - pattern: "*fastp.fastq.gz" - - json: - type: file - description: Results in JSON format - pattern: "*.json" - - html: - type: file - description: Results in HTML format - pattern: "*.html" - - log: - type: file - description: fastq log file - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads_fail: - type: file - description: Reads the failed the preprocessing - pattern: "*fail.fastq.gz" - - reads_merged: - type: file - description: Reads that were successfully merged - pattern: "*.{merged.fastq.gz}" -authors: - - "@drpatelh" - - "@kevinmenden" -maintainers: - - "@drpatelh" - - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test deleted file mode 100644 index 6f1f4897..00000000 --- a/modules/nf-core/fastp/tests/main.nf.test +++ /dev/null @@ -1,725 +0,0 @@ -nextflow_process { - - name "Test Process FASTP" - script "../main.nf" - process "FASTP" - tag "modules" - tag "modules_nfcore" - tag "fastp" - - test("test_fastp_single_end") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:12.922000 K (92.984097%)", - "single end (151 cycles)" ] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 99" ] - def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_single_end-_match") - }, - { assert snapshot(process.out.versions).match("versions_single_end") } - ) - } - } - - test("test_fastp_single_end-stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_single_end-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_single_end_stub") } - ) - } - } - - test("test_fastp_paired_end") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] - def log_text = [ "No adapter detected for read1", - "Q30 bases: 12281(88.3716%)"] - def json_text = ['"passed_filter_reads": 198'] - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end") } - ) - } - } - - test("test_fastp_paired_end-stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end-stub") } - ) - } - } - - test("fastp test_fastp_interleaved") { - - config './nextflow.interleaved.config' - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "paired end (151 cycles + 151 cycles)"] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 162"] - def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_interleaved-_match") - }, - { assert snapshot(process.out.versions).match("versions_interleaved") } - ) - } - } - - test("fastp test_fastp_interleaved-stub") { - - options '-stub' - - config './nextflow.interleaved.config' - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_interleaved-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_interleaved-stub") } - ) - } - } - - test("test_fastp_single_end_trim_fail") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = true - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:12.922000 K (92.984097%)", - "single end (151 cycles)"] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 99" ] - def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { failed_read_lines.each { failed_read_line -> - { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } - ) - } - } - - test("test_fastp_paired_end_trim_fail") { - - config './nextflow.save_failed.config' - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = true - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] - def log_text = [ "No adapter detected for read1", - "Q30 bases: 12281(88.3716%)"] - def json_text = ['"passed_filter_reads": 162'] - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { failed_read2_lines.each { failed_read2_line -> - { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } - ) - } - } - - test("test_fastp_paired_end_merged") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = true - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "
"] - def log_text = [ "Merged and filtered:", - "total reads: 75", - "total bases: 13683"] - def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] - def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", - "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", - "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { read_merged_lines.each { read_merged_line -> - { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_merged_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged") } - ) - } - } - - test("test_fastp_paired_end_merged-stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = true - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_merged-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } - ) - } - } - - test("test_fastp_paired_end_merged_adapterlist") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) - save_trimmed_fail = false - save_merged = true - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "
"] - def log_text = [ "Merged and filtered:", - "total reads: 75", - "total bases: 13683"] - def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] - def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", - "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", - "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { read_merged_lines.each { read_merged_line -> - { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } - ) - } - } -} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap deleted file mode 100644 index 3e876288..00000000 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ /dev/null @@ -1,330 +0,0 @@ -{ - "fastp test_fastp_interleaved_json": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:19:15.063001" - }, - "test_fastp_paired_end_merged-for_stub_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "test.merged.fastq.gz", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:10:13.467574" - }, - "versions_interleaved": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:56:24.615634793" - }, - "test_fastp_single_end_json": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:18:43.526412" - }, - "versions_paired_end": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:42.333545689" - }, - "test_fastp_paired_end_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:03:06.431833729" - }, - "test_fastp_interleaved-_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:19:15.111894" - }, - "test_fastp_paired_end_merged_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "test.merged.fastq.gz", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:08:44.496251446" - }, - "versions_single_end_stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:27.354051299" - }, - "versions_interleaved-stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:56:46.535528418" - }, - "versions_single_end_trim_fail": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:03.724591407" - }, - "test_fastp_paired_end-for_stub_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:07:15.398827" - }, - "versions_paired_end-stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:56:06.50017282" - }, - "versions_single_end": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:07.67921647" - }, - "versions_paired_end_merged_stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:47.350653154" - }, - "test_fastp_interleaved-for_stub_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:08:06.127974" - }, - "versions_paired_end_trim_fail": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:18.140484878" - }, - "test_fastp_single_end-for_stub_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:06:00.244202" - }, - "test_fastp_single_end-_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:18:43.580336" - }, - "versions_paired_end_merged_adapterlist": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:05:37.845370554" - }, - "versions_paired_end_merged": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:32.860543858" - }, - "test_fastp_single_end_trim_fail_json": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:08:41.942317" - } -} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config deleted file mode 100644 index 4be8dbd2..00000000 --- a/modules/nf-core/fastp/tests/nextflow.interleaved.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: FASTP { - ext.args = "--interleaved_in -e 30" - } -} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config deleted file mode 100644 index 53b61b0c..00000000 --- a/modules/nf-core/fastp/tests/nextflow.save_failed.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: FASTP { - ext.args = "-e 30" - } -} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml deleted file mode 100644 index c1afcce7..00000000 --- a/modules/nf-core/fastp/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastp: - - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/gawk/environment.yml similarity index 62% rename from modules/nf-core/fastp/environment.yml rename to modules/nf-core/gawk/environment.yml index 70389e66..3d98a08b 100644 --- a/modules/nf-core/fastp/environment.yml +++ b/modules/nf-core/gawk/environment.yml @@ -1,7 +1,7 @@ -name: fastp +name: gawk channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::fastp=0.23.4 + - conda-forge::gawk=5.3.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 00000000..ca468929 --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,55 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input) + path(program_file) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + + program = program_file ? "-f ${program_file}" : "${args2}" + + """ + awk \\ + ${args} \\ + ${program} \\ + ${input} \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + + """ + ${create_cmd} ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 00000000..2b6033b0 --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,50 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on this file on the `ext.args2` or in the program file + pattern: "*" + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: The output file - specify the name of this file using `ext.prefix` and the extension using `ext.suffix` + pattern: "*" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test new file mode 100644 index 00000000..fce82ca9 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GAWK" + script "../main.nf" + process "GAWK" + + tag "modules" + tag "modules_nfcore" + tag "gawk" + + test("convert fasta to bed") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("convert fasta to bed with program file") { + config "./nextflow_with_program_file.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = Channel.of('BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap new file mode 100644 index 00000000..4f3a759c --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "convert fasta to bed with program file": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-17T15:20:02.495430346" + }, + "convert fasta to bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-17T15:19:53.291809648" + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config new file mode 100644 index 00000000..6e5d43a3 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GAWK { + ext.suffix = "bed" + ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } +} diff --git a/modules/nf-core/gawk/tests/nextflow_with_program_file.config b/modules/nf-core/gawk/tests/nextflow_with_program_file.config new file mode 100644 index 00000000..693ad419 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow_with_program_file.config @@ -0,0 +1,5 @@ +process { + withName: GAWK { + ext.suffix = "bed" + } +} diff --git a/modules/nf-core/gawk/tests/tags.yml b/modules/nf-core/gawk/tests/tags.yml new file mode 100644 index 00000000..72e4531d --- /dev/null +++ b/modules/nf-core/gawk/tests/tags.yml @@ -0,0 +1,2 @@ +gawk: + - "modules/nf-core/gawk/**" diff --git a/modules/nf-core/genmod/annotate/environment.yml b/modules/nf-core/genmod/annotate/environment.yml new file mode 100644 index 00000000..ac8140fd --- /dev/null +++ b/modules/nf-core/genmod/annotate/environment.yml @@ -0,0 +1,7 @@ +name: genmod_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/annotate/main.nf b/modules/nf-core/genmod/annotate/main.nf new file mode 100644 index 00000000..eb161187 --- /dev/null +++ b/modules/nf-core/genmod/annotate/main.nf @@ -0,0 +1,46 @@ +process GENMOD_ANNOTATE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input_vcf) + + output: + tuple val(meta), path("*_annotate.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + genmod \\ + annotate \\ + $args \\ + --outfile ${prefix}_annotate.vcf \\ + $input_vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_annotate.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/genmod/annotate/meta.yml b/modules/nf-core/genmod/annotate/meta.yml new file mode 100644 index 00000000..3c876ac5 --- /dev/null +++ b/modules/nf-core/genmod/annotate/meta.yml @@ -0,0 +1,41 @@ +name: "genmod_annotate" +description: for annotating regions, frequencies, cadd scores +keywords: + - annotate + - genmod + - ranking +tools: + - "genmod": + description: "Annotate genetic inheritance models in variant files" + homepage: "https://github.com/Clinical-Genomics/genmod" + documentation: "https://github.com/Clinical-Genomics/genmod" + tool_dev_url: "https://github.com/moonso" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_vcf: + type: file + description: VCF file + pattern: "*.{vcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Annotated VCF file + pattern: "*.{vcf}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/annotate/tests/main.nf.test b/modules/nf-core/genmod/annotate/tests/main.nf.test new file mode 100644 index 00000000..d17ebc9e --- /dev/null +++ b/modules/nf-core/genmod/annotate/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process GENMOD_ANNOTATE" + script "modules/nf-core/genmod/annotate/main.nf" + process "GENMOD_ANNOTATE" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/annotate" + + test("genmod_annotate") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/genmod.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/annotate/tests/nextflow.config b/modules/nf-core/genmod/annotate/tests/nextflow.config new file mode 100644 index 00000000..a1860460 --- /dev/null +++ b/modules/nf-core/genmod/annotate/tests/nextflow.config @@ -0,0 +1,5 @@ +process{ + withName: GENMOD_ANNOTATE { + ext.args = " --annotate_regions " + } +} diff --git a/modules/nf-core/genmod/annotate/tests/tags.yml b/modules/nf-core/genmod/annotate/tests/tags.yml new file mode 100644 index 00000000..88bb35a1 --- /dev/null +++ b/modules/nf-core/genmod/annotate/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/annotate: + - modules/nf-core/genmod/annotate/** diff --git a/modules/nf-core/genmod/compound/environment.yml b/modules/nf-core/genmod/compound/environment.yml new file mode 100644 index 00000000..1e2561fd --- /dev/null +++ b/modules/nf-core/genmod/compound/environment.yml @@ -0,0 +1,7 @@ +name: genmod_compound +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/compound/main.nf b/modules/nf-core/genmod/compound/main.nf new file mode 100644 index 00000000..1731b722 --- /dev/null +++ b/modules/nf-core/genmod/compound/main.nf @@ -0,0 +1,47 @@ +process GENMOD_COMPOUND { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input_vcf) + + output: + tuple val(meta), path("*_compound.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + genmod \\ + compound \\ + $args \\ + --processes ${task.cpus} \\ + --outfile ${prefix}_compound.vcf \\ + $input_vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_compound.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/genmod/compound/meta.yml b/modules/nf-core/genmod/compound/meta.yml new file mode 100644 index 00000000..aa5f7da5 --- /dev/null +++ b/modules/nf-core/genmod/compound/meta.yml @@ -0,0 +1,41 @@ +name: "genmod_compound" +description: Score compounds +keywords: + - compound + - genmod + - ranking +tools: + - "genmod": + description: "Annotate genetic inheritance models in variant files" + homepage: "https://github.com/Clinical-Genomics/genmod" + documentation: "https://github.com/Clinical-Genomics/genmod" + tool_dev_url: "https://github.com/moonso" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_vcf: + type: file + description: VCF file + pattern: "*.{vcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Output VCF file + pattern: "*.{vcf}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/compound/tests/main.nf.test b/modules/nf-core/genmod/compound/tests/main.nf.test new file mode 100644 index 00000000..43ed9f0d --- /dev/null +++ b/modules/nf-core/genmod/compound/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process GENMOD_COMPOUND" + script "modules/nf-core/genmod/compound/main.nf" + process "GENMOD_COMPOUND" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/compound" + + test("genmod_compound") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/genmod_compound.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/compound/tests/tags.yml b/modules/nf-core/genmod/compound/tests/tags.yml new file mode 100644 index 00000000..870b35e8 --- /dev/null +++ b/modules/nf-core/genmod/compound/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/compound: + - modules/nf-core/genmod/compound/** diff --git a/modules/nf-core/genmod/models/environment.yml b/modules/nf-core/genmod/models/environment.yml new file mode 100644 index 00000000..62a746c7 --- /dev/null +++ b/modules/nf-core/genmod/models/environment.yml @@ -0,0 +1,7 @@ +name: genmod_models +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/models/main.nf b/modules/nf-core/genmod/models/main.nf new file mode 100644 index 00000000..0504574a --- /dev/null +++ b/modules/nf-core/genmod/models/main.nf @@ -0,0 +1,53 @@ +process GENMOD_MODELS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input_vcf) + path (fam) + path (reduced_penetrance) + + output: + tuple val(meta), path("*_models.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def family_file = fam ? "--family_file ${fam}" : "" + def pen_file = reduced_penetrance ? "--reduced_penetrance ${reduced_penetrance}" : "" + """ + genmod \\ + models \\ + $args \\ + $pen_file \\ + $family_file \\ + --processes ${task.cpus} \\ + --outfile ${prefix}_models.vcf \\ + $input_vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_models.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/genmod/models/meta.yml b/modules/nf-core/genmod/models/meta.yml new file mode 100644 index 00000000..dd9001e0 --- /dev/null +++ b/modules/nf-core/genmod/models/meta.yml @@ -0,0 +1,49 @@ +name: "genmod_models" +description: annotate models of inheritance +keywords: + - models + - genmod + - ranking +tools: + - "genmod": + description: "Annotate genetic inheritance models in variant files" + homepage: "https://github.com/Clinical-Genomics/genmod" + documentation: "https://github.com/Clinical-Genomics/genmod" + tool_dev_url: "https://github.com/moonso" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_vcf: + type: file + description: vcf file + pattern: "*.{vcf}" + - reduced_penetrance: + type: file + description: file with gene ids that have reduced penetrance + pattern: "*.{tsv}" + - family_file: + type: file + description: ped file + pattern: "*.{ped}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Output VCF file + pattern: "*.{vcf}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/models/tests/main.nf.test b/modules/nf-core/genmod/models/tests/main.nf.test new file mode 100644 index 00000000..9eba8dd5 --- /dev/null +++ b/modules/nf-core/genmod/models/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GENMOD_MODELS" + script "modules/nf-core/genmod/models/main.nf" + process "GENMOD_MODELS" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/models" + + test("genmod_models") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test_annotate.vcf.gz', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/ped/justhusky.ped', checkIfExists: true) + input[2] = [] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/models/tests/tags.yml b/modules/nf-core/genmod/models/tests/tags.yml new file mode 100644 index 00000000..72b3b6bb --- /dev/null +++ b/modules/nf-core/genmod/models/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/models: + - modules/nf-core/genmod/models/** diff --git a/modules/nf-core/genmod/score/environment.yml b/modules/nf-core/genmod/score/environment.yml new file mode 100644 index 00000000..bacc4cec --- /dev/null +++ b/modules/nf-core/genmod/score/environment.yml @@ -0,0 +1,7 @@ +name: genmod_score +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/score/main.nf b/modules/nf-core/genmod/score/main.nf new file mode 100644 index 00000000..15be5f74 --- /dev/null +++ b/modules/nf-core/genmod/score/main.nf @@ -0,0 +1,52 @@ +process GENMOD_SCORE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input_vcf) + path (fam) + path (score_config) + + output: + tuple val(meta), path("*_score.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def family_file = fam ? "--family_file ${fam}" : "" + def config_file = score_config ? "--score_config ${score_config}" : "" + """ + genmod \\ + score \\ + $args \\ + $family_file \\ + $config_file \\ + --outfile ${prefix}_score.vcf \\ + $input_vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_score.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/genmod/score/meta.yml b/modules/nf-core/genmod/score/meta.yml new file mode 100644 index 00000000..8998b00c --- /dev/null +++ b/modules/nf-core/genmod/score/meta.yml @@ -0,0 +1,49 @@ +name: "genmod_score" +description: Score the variants of a vcf based on their annotation +keywords: + - score + - ranking + - genmod +tools: + - "genmod": + description: "Annotate genetic inheritance models in variant files" + homepage: "https://github.com/Clinical-Genomics/genmod" + documentation: "https://github.com/Clinical-Genomics/genmod" + tool_dev_url: "https://github.com/moonso" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_vcf: + type: file + description: vcf file + pattern: "*.{vcf}" + - family_file: + type: file + description: ped file + pattern: "*.{ped}" + - score_config: + type: file + description: rank model config file + pattern: "*.{ini}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Output VCF file + pattern: "*.{vcf}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/score/tests/main.nf.test b/modules/nf-core/genmod/score/tests/main.nf.test new file mode 100644 index 00000000..030ef6ba --- /dev/null +++ b/modules/nf-core/genmod/score/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GENMOD_SCORE" + script "modules/nf-core/genmod/score/main.nf" + process "GENMOD_SCORE" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/score" + + test("genmod_score") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test_models.vcf.gz', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/ped/justhusky.ped', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/genmod/svrank_model_-v1.8-.ini', checkIfExists: true) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/score/tests/tags.yml b/modules/nf-core/genmod/score/tests/tags.yml new file mode 100644 index 00000000..cef831ed --- /dev/null +++ b/modules/nf-core/genmod/score/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/score: + - modules/nf-core/genmod/score/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml index 25910b34..dfc02a7b 100644 --- a/modules/nf-core/gunzip/environment.yml +++ b/modules/nf-core/gunzip/environment.yml @@ -4,4 +4,6 @@ channels: - bioconda - defaults dependencies: - - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 468a6f28..5e67e3b9 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -4,8 +4,8 @@ process GUNZIP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,8 +18,11 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ # Not calling gunzip itself because it creates files # with the original group ownership rather than the @@ -37,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 231034f2..f32973a0 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -37,3 +37,4 @@ maintainers: - "@joseespinosa" - "@drpatelh" - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index 6406008e..776211ad 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -33,4 +33,89 @@ nextflow_process { } + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + } diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap index 720fd9ff..069967e7 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -1,4 +1,70 @@ { + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, "Should run without failures": { "content": [ { @@ -26,6 +92,43 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" } } \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff index 650cf697..03790816 100644 --- a/modules/nf-core/hifiasm/hifiasm.diff +++ b/modules/nf-core/hifiasm/hifiasm.diff @@ -8,7 +8,7 @@ Changes in module 'nf-core/hifiasm' - tuple val(meta), path(reads) - path paternal_kmer_dump - path maternal_kmer_dump -+ tuple val(meta), path(reads), path(paternal_kmer_dump), path(maternal_kmer_dump) ++ tuple val(meta), path(reads, stageAs: "?/*"), path(paternal_kmer_dump), path(maternal_kmer_dump) path hic_read1 path hic_read2 diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 040d8369..a1bf30a0 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -8,7 +8,7 @@ process HIFIASM { 'biocontainers/hifiasm:0.19.8--h43eeafb_0' }" input: - tuple val(meta), path(reads), path(paternal_kmer_dump), path(maternal_kmer_dump) + tuple val(meta), path(reads, stageAs: "?/*"), path(paternal_kmer_dump), path(maternal_kmer_dump) path hic_read1 path hic_read2 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 66745efc..d82dc14d 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -28,17 +28,25 @@ process MINIMAP2_ALIGN { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ + $samtools_reset_fastq \\ minimap2 \\ $args \\ -t $task.cpus \\ - ${reference ?: reads} \\ - $reads \\ + $target \\ + $query \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output @@ -55,6 +63,9 @@ process MINIMAP2_ALIGN { def prefix = task.ext.prefix ?: "${meta.id}" def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ touch $output_file ${bam_index} diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index c93f4364..4072c171 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -34,7 +34,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -67,7 +68,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, process.out.versions ).match() } @@ -104,7 +106,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -137,7 +140,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -145,6 +149,104 @@ nextflow_process { } + test("sarscov2 - bam, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { options "-stub" @@ -241,4 +343,99 @@ nextflow_process { } + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + } \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index ecb3e4ee..12264a85 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -1,4 +1,83 @@ { + "sarscov2 - bam, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { "content": [ { @@ -154,7 +233,13 @@ }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ - "test.bam", + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", [ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] @@ -163,11 +248,17 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-03T12:21:31.582849048" + "timestamp": "2024-07-23T11:18:18.964586894" }, "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ - "test.bam", + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", [ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] @@ -176,11 +267,17 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-03T12:29:11.968586416" + "timestamp": "2024-07-23T11:17:48.667488325" }, "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ - "test.bam", + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", "test.bam.bai", [ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" @@ -190,11 +287,182 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-03T12:21:20.362186362" + "timestamp": "2024-07-23T11:18:02.517416733" + }, + "sarscov2 - bam, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" }, "sarscov2 - fastq, [], true, false, false": { "content": [ - "test.bam", + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", [ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] @@ -203,6 +471,6 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-03T12:21:37.233709954" + "timestamp": "2024-07-23T11:18:34.246998277" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml index 1d1bc98f..4455904e 100644 --- a/modules/nf-core/samtools/fastq/environment.yml +++ b/modules/nf-core/samtools/fastq/environment.yml @@ -1,10 +1,8 @@ name: samtools_fastq - channels: - conda-forge - bioconda - defaults - dependencies: - - bioconda::htslib=1.20 - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf index 66192d2a..6796c02b 100644 --- a/modules/nf-core/samtools/fastq/main.nf +++ b/modules/nf-core/samtools/fastq/main.nf @@ -23,21 +23,17 @@ process SAMTOOLS_FASTQ { script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" : meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" """ - samtools reset \\ - --threads ${task.cpus-1} \\ + samtools \\ + fastq \\ $args \\ - $input \\ - | \\ - samtools fastq \\ - $args2 \\ --threads ${task.cpus-1} \\ -0 ${prefix}_other.fastq.gz \\ + $input \\ $output cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/samtools/fastq/samtools-fastq.diff b/modules/nf-core/samtools/fastq/samtools-fastq.diff deleted file mode 100644 index 000ce76a..00000000 --- a/modules/nf-core/samtools/fastq/samtools-fastq.diff +++ /dev/null @@ -1,45 +0,0 @@ -Changes in module 'nf-core/samtools/fastq' ---- modules/nf-core/samtools/fastq/main.nf -+++ modules/nf-core/samtools/fastq/main.nf -@@ -23,17 +23,21 @@ - - script: - def args = task.ext.args ?: '' -+ def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" : - meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : - "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" - """ -- samtools \\ -- fastq \\ -+ samtools reset \\ -+ --threads ${task.cpus-1} \\ - $args \\ -+ $input \\ -+ | \\ -+ samtools fastq \\ -+ $args2 \\ - --threads ${task.cpus-1} \\ - -0 ${prefix}_other.fastq.gz \\ -- $input \\ - $output - - cat <<-END_VERSIONS > versions.yml - ---- modules/nf-core/samtools/fastq/environment.yml -+++ modules/nf-core/samtools/fastq/environment.yml -@@ -1,8 +1,10 @@ - name: samtools_fastq -+ - channels: - - conda-forge - - bioconda - - defaults -+ - dependencies: -+ - bioconda::htslib=1.20 - - bioconda::samtools=1.20 -- - bioconda::htslib=1.20 - -************************************************************ diff --git a/modules/nf-core/samtools/import/environment.yml b/modules/nf-core/samtools/import/environment.yml new file mode 100644 index 00000000..38f1ebab --- /dev/null +++ b/modules/nf-core/samtools/import/environment.yml @@ -0,0 +1,8 @@ +name: samtools_import +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/import/main.nf b/modules/nf-core/samtools/import/main.nf new file mode 100644 index 00000000..954a8052 --- /dev/null +++ b/modules/nf-core/samtools/import/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_IMPORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0': + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.sam") , emit: sam, optional: true + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def input = reads instanceof List && meta.single_end ? reads.join(" -0") : // multiple single-end files + reads instanceof List && !meta.single_end ? "-1 ${reads[0]} -2 ${reads[1]}": // paired end file + meta.single_end ? "-0 $reads" : // single single-end file + !meta.single_end ? "-s $reads": // interleave paired-end file + reads // if all else fails, just add the reads without flags + """ + samtools \\ + import \\ + $input \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/import/meta.yml b/modules/nf-core/samtools/import/meta.yml new file mode 100644 index 00000000..9002e092 --- /dev/null +++ b/modules/nf-core/samtools/import/meta.yml @@ -0,0 +1,56 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "samtools_import" +description: converts FASTQ files to unmapped SAM/BAM/CRAM +keywords: + - import + - fastq + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - reads: + type: file + description: fastq data to be converted to SAM/BAM/CRAM + pattern: "*.{fastq,fq,fastq.gz,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sam: + type: file + description: SAM file + pattern: "*.sam" + - bam: + type: file + description: Unaligned BAM file + pattern: "*.bam" + - cram: + type: file + description: Unaligned CRAM file + pattern: "*.cram" +authors: + - "@matthdsm" +maintainers: + - "@matthdsm" diff --git a/modules/nf-core/samtools/import/tests/main.nf.test b/modules/nf-core/samtools/import/tests/main.nf.test new file mode 100644 index 00000000..d029ca70 --- /dev/null +++ b/modules/nf-core/samtools/import/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IMPORT" + script "../main.nf" + process "SAMTOOLS_IMPORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/import" + + test("samtools_import_single ") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()} + ) + } + } + + test("samtools_import_paired ") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()} + ) + } + } + + test("samtools_import_interleaved") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/import/tests/main.nf.test.snap b/modules/nf-core/samtools/import/tests/main.nf.test.snap new file mode 100644 index 00000000..eb730a06 --- /dev/null +++ b/modules/nf-core/samtools/import/tests/main.nf.test.snap @@ -0,0 +1,103 @@ +{ + "samtools_import_single ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T11:38:44.388259606" + }, + "samtools_import_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T11:38:56.393371331" + }, + "samtools_import_paired ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T11:38:50.437197406" + }, + "samtools_import_interleaved ": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,fad91b070f51c77d7abe22cd31243710" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,a529fc2aa6485db14986c95c53638b11" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,fad91b070f51c77d7abe22cd31243710" + ] + ], + "cram": [ + + ], + "sam": [ + + ], + "versions": [ + "versions.yml:md5,a529fc2aa6485db14986c95c53638b11" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-30T12:12:43.491200967" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/import/tests/tags.yml b/modules/nf-core/samtools/import/tests/tags.yml new file mode 100644 index 00000000..89c89128 --- /dev/null +++ b/modules/nf-core/samtools/import/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/import: + - modules/nf-core/samtools/import/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 369b58a2..260d516b 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -1,10 +1,8 @@ name: samtools_index - channels: - conda-forge - bioconda - defaults - dependencies: - - bioconda::htslib=1.20 - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index b523c21b..e002585b 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -35,10 +35,11 @@ process SAMTOOLS_INDEX { """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi + touch ${input}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index bb7756d1..ca34fb5c 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -9,11 +9,7 @@ nextflow_process { tag "samtools/index" test("bai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -27,18 +23,13 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.bai).match("bai") }, - { assert snapshot(process.out.versions).match("bai_versions") } + { assert snapshot(process.out).match() } ) } } test("crai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -52,20 +43,83 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.crai).match("crai") }, - { assert snapshot(process.out.versions).match("crai_versions") } + { assert snapshot(process.out).match() } ) } } test("csi") { - config "./csi.nextflow.config" when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { process { """ input[0] = Channel.of([ @@ -79,8 +133,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert snapshot(process.out.versions).match("csi_versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index 52756e85..799d199c 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,74 +1,250 @@ { - "crai_versions": { + "csi - stub": { "content": [ - [ - "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" - ] + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-28T15:42:04.203740976" + "timestamp": "2024-07-22T16:51:53.9057" }, - "csi_versions": { + "crai - stub": { "content": [ - [ - "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-28T15:42:09.57475878" + "timestamp": "2024-07-22T16:51:45.931558" }, - "crai": { + "bai - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-12T18:41:38.446424" + "timestamp": "2024-07-22T16:51:34.807525" }, - "bai": { + "csi": { "content": [ + "test.paired_end.sorted.bam.csi", [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" - ] + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-12T18:40:46.579747" + "timestamp": "2024-07-22T16:52:55.688799" }, - "bai_versions": { + "crai": { "content": [ - [ - "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:51:17.609533" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-28T15:41:57.929287369" + "timestamp": "2024-07-22T16:51:04.16585" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index af1f2cf6..36a12eab 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -1,10 +1,8 @@ name: samtools_sort - channels: - conda-forge - bioconda - defaults - dependencies: - - bioconda::htslib=1.20 - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 596c6f7e..8e019099 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -50,10 +50,20 @@ process SAMTOOLS_SORT { """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" """ - touch ${prefix}.bam - touch ${prefix}.bam.csi + touch ${prefix}.${extension} + if [ "${extension}" == "bam" ]; + then + touch ${prefix}.${extension}.csi + elif [ "${extension}" == "cram" ]; + then + touch ${prefix}.${extension}.crai + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index fb38ed9b..c2ea9c72 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -32,16 +32,16 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.bam, - process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } - ).match("test_bam") - } + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} ) } } test("cram") { - config "./nextflow.config" + config "./nextflow_cram.config" when { process { @@ -62,23 +62,20 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - process.out.bam, - process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } - ).match("test_cram") - } + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} ) } } - test("bam_stub") { + test("bam - stub") { - config "./nextflow.config" options "-stub" + config "./nextflow.config" when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -96,8 +93,35 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, - { assert snapshot(process.out.versions).match("bam_stub_versions") } + { assert snapshot(process.out).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index 5a27de1d..da38d5d1 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -7,54 +7,159 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + "test.sorted.cram" ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-31T08:13:54.512837189" + "timestamp": "2024-07-22T17:19:37.196205" }, - "bam_stub_bam": { + "bam - stub": { "content": [ - "test.sorted.bam" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-31T07:29:00.761845507" + "timestamp": "2024-07-22T15:54:46.580756" }, - "test_cram": { + "cram - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,22b2093be34a7637f5fbc84272b89d06" - ] - ], - [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi" + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-31T09:16:51.924951855" + "timestamp": "2024-07-22T15:57:30.505698" }, - "test_bam": { + "bam": { "content": [ [ [ @@ -73,42 +178,15 @@ }, "test.sorted.bam.csi" ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-05-31T08:28:12.15952312" - }, - "bam_stub_versions": { - "content": [ + ], [ "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-05-31T07:29:00.765038811" - }, - "bam": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-05-31T08:13:48.538030517" + "timestamp": "2024-07-22T15:54:25.872954" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 00000000..3a8c0188 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index 38185dd3..150c3777 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -1,10 +1,8 @@ name: samtools_view - channels: - conda-forge - bioconda - defaults - dependencies: - - bioconda::htslib=1.20 - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 38df8576..dc611448 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -13,13 +13,15 @@ process SAMTOOLS_VIEW { path qname output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -27,13 +29,13 @@ process SAMTOOLS_VIEW { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname}": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ @@ -54,14 +56,14 @@ process SAMTOOLS_VIEW { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - def index = args.contains("--write-index") ? "touch ${prefix}.csi" : "" + def index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" """ touch ${prefix}.${file_type} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3dadafae..27be60d0 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -73,6 +73,15 @@ output: type: file description: optional CRAM file index pattern: "*.{crai}" + # unselected and unselected_index are created when passing a qname + - unselected: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test index 45a0defb..37b81a91 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -172,6 +172,8 @@ nextflow_process { { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } ) } diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap index eb0c577c..6bcce9fe 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -355,6 +355,26 @@ }, "timestamp": "2024-02-12T19:38:23.322874" }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, "bam_versions": { "content": [ [ @@ -477,7 +497,7 @@ }, "bam_stub_csi": { "content": [ - "test.csi" + "test.bam.csi" ], "meta": { "nf-test": "0.8.4", diff --git a/modules/nf-core/sniffles/main.nf b/modules/nf-core/sniffles/main.nf index 83f63586..41bc76c1 100644 --- a/modules/nf-core/sniffles/main.nf +++ b/modules/nf-core/sniffles/main.nf @@ -49,7 +49,6 @@ process SNIFFLES { stub: def prefix = task.ext.prefix ?: "${meta.id}" - """ touch ${prefix}.vcf.gz touch ${prefix}.snf @@ -59,6 +58,4 @@ process SNIFFLES { sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //') END_VERSIONS """ - } - diff --git a/modules/nf-core/sniffles/meta.yml b/modules/nf-core/sniffles/meta.yml index 333d5de1..c8b2da7d 100644 --- a/modules/nf-core/sniffles/meta.yml +++ b/modules/nf-core/sniffles/meta.yml @@ -3,7 +3,7 @@ description: structural-variant calling with sniffles keywords: - sniffles - structural-variant calling - - long-read sequencing + - long-read tools: - sniffles: description: a fast structural variant caller for long-read sequencing diff --git a/modules/nf-core/sniffles/sniffles.diff b/modules/nf-core/sniffles/sniffles.diff index 9b8461a9..9b748dd3 100644 --- a/modules/nf-core/sniffles/sniffles.diff +++ b/modules/nf-core/sniffles/sniffles.diff @@ -1,15 +1,7 @@ Changes in module 'nf-core/sniffles' --- modules/nf-core/sniffles/meta.yml +++ modules/nf-core/sniffles/meta.yml -@@ -3,6 +3,7 @@ - keywords: - - sniffles - - structural-variant calling -+ - long-read sequencing - tools: - - sniffles: - description: a fast structural variant caller for long-read sequencing -@@ -16,10 +17,10 @@ +@@ -17,10 +17,10 @@ description: | Groovy Map containing sample information e.g. [ id:'test' ] @@ -23,7 +15,7 @@ Changes in module 'nf-core/sniffles' - bai: type: file description: Index of BAM file -@@ -33,6 +34,15 @@ +@@ -34,6 +34,15 @@ type: file description: | Reference database in FASTA format @@ -39,7 +31,7 @@ Changes in module 'nf-core/sniffles' output: - meta: type: map -@@ -45,8 +55,8 @@ +@@ -46,8 +55,8 @@ pattern: "*.vcf.gz" - snf: type: file @@ -76,7 +68,7 @@ Changes in module 'nf-core/sniffles' when: task.ext.when == null || task.ext.when -@@ -23,18 +26,39 @@ +@@ -23,14 +26,33 @@ script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -99,26 +91,20 @@ Changes in module 'nf-core/sniffles' + $snf \\ $args + - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //') - END_VERSIONS - """ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //') ++ END_VERSIONS ++ """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" -+ + """ + touch ${prefix}.vcf.gz + touch ${prefix}.snf + -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //') -+ END_VERSIONS -+ """ -+ - } - + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //') ************************************************************ diff --git a/modules/nf-core/somalier/extract/environment.yml b/modules/nf-core/somalier/extract/environment.yml index e0dd11c9..fe32ebda 100644 --- a/modules/nf-core/somalier/extract/environment.yml +++ b/modules/nf-core/somalier/extract/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::somalier=0.2.15 + - bioconda::somalier=0.2.18 diff --git a/modules/nf-core/somalier/extract/main.nf b/modules/nf-core/somalier/extract/main.nf index 8dc3c31c..4a20f6b3 100644 --- a/modules/nf-core/somalier/extract/main.nf +++ b/modules/nf-core/somalier/extract/main.nf @@ -5,14 +5,14 @@ process SOMALIER_EXTRACT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/somalier:0.2.15--h37c5b7d_0': - 'biocontainers/somalier:0.2.15--h37c5b7d_0' }" + 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0': + 'biocontainers/somalier:0.2.18--hb57907c_0' }" input: tuple val(meta), path(input), path(input_index) - path(fasta) - path(fai) - path(sites) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(sites) output: tuple val(meta), path("*.somalier") , emit: extract diff --git a/modules/nf-core/somalier/extract/meta.yml b/modules/nf-core/somalier/extract/meta.yml index 7aa426ab..aabaf5d6 100644 --- a/modules/nf-core/somalier/extract/meta.yml +++ b/modules/nf-core/somalier/extract/meta.yml @@ -19,7 +19,7 @@ tools: documentation: "https://github.com/brentp/somalier/blob/master/README.md" tool_dev_url: "https://github.com/brentp/somalier" doi: "10.1186/s13073-020-00761-2" - licence: "MIT License" + licence: ["MIT"] input: - meta: type: map @@ -32,14 +32,29 @@ input: - input_index: type: file description: index file of the input data, e.g., bam.bai, cram.crai + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'hg38' ] - fasta: type: file description: The reference fasta file pattern: "*.{fasta,fna,fas,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'hg38' ] - fai: type: file description: FASTA index file pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing sites information + e.g. [ id:'hg38' ] - sites: type: file description: sites file in VCF format which can be taken from https://github.com/brentp/somalier diff --git a/modules/nf-core/somalier/extract/somalier-extract.diff b/modules/nf-core/somalier/extract/somalier-extract.diff new file mode 100644 index 00000000..59f472ab --- /dev/null +++ b/modules/nf-core/somalier/extract/somalier-extract.diff @@ -0,0 +1,25 @@ +Changes in module 'nf-core/somalier/extract' +--- modules/nf-core/somalier/extract/main.nf ++++ modules/nf-core/somalier/extract/main.nf +@@ -5,8 +5,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/somalier:0.2.19--h0c29559_0': +- 'biocontainers/somalier:0.2.19--h0c29559_0' }" ++ 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0': ++ 'biocontainers/somalier:0.2.18--hb57907c_0' }" + + input: + tuple val(meta), path(input), path(input_index) + +--- modules/nf-core/somalier/extract/environment.yml ++++ modules/nf-core/somalier/extract/environment.yml +@@ -4,4 +4,4 @@ + - bioconda + - defaults + dependencies: +- - bioconda::somalier=0.2.19 ++ - bioconda::somalier=0.2.18 + +************************************************************ diff --git a/modules/nf-core/somalier/extract/tests/main.nf.test b/modules/nf-core/somalier/extract/tests/main.nf.test new file mode 100644 index 00000000..dfc7e8a5 --- /dev/null +++ b/modules/nf-core/somalier/extract/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process SOMALIER_EXTRACT" + script "../main.nf" + process "SOMALIER_EXTRACT" + + tag "modules" + tag "modules_nfcore" + tag "somalier" + tag "somalier/extract" + + test("homo_sapiens - [ bam, bai ], fasta, fai, sites") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/somalier/sites_chr21.hg38.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ bam, bai ], fasta, fai, sites -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/somalier/sites_chr21.hg38.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/somalier/extract/tests/main.nf.test.snap b/modules/nf-core/somalier/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..83df72a4 --- /dev/null +++ b/modules/nf-core/somalier/extract/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "homo_sapiens - [ bam, bai ], fasta, fai, sites": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "normal.somalier:md5,21f0b980edd42ddaa8ab964959c1de02" + ] + ], + "1": [ + "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0" + ], + "extract": [ + [ + { + "id": "test", + "single_end": false + }, + "normal.somalier:md5,21f0b980edd42ddaa8ab964959c1de02" + ] + ], + "versions": [ + "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T13:25:23.384476049" + }, + "homo_sapiens - [ bam, bai ], fasta, fai, sites -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0" + ], + "extract": [ + [ + { + "id": "test", + "single_end": false + }, + "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T13:25:35.741711687" + } +} \ No newline at end of file diff --git a/modules/nf-core/somalier/extract/tests/tags.yml b/modules/nf-core/somalier/extract/tests/tags.yml new file mode 100644 index 00000000..836e3524 --- /dev/null +++ b/modules/nf-core/somalier/extract/tests/tags.yml @@ -0,0 +1,2 @@ +somalier/extract: + - "modules/nf-core/somalier/extract/**" diff --git a/modules/nf-core/somalier/relate/environment.yml b/modules/nf-core/somalier/relate/environment.yml index b8b70f67..3a730471 100644 --- a/modules/nf-core/somalier/relate/environment.yml +++ b/modules/nf-core/somalier/relate/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::somalier=0.2.15 + - bioconda::somalier=0.2.18 diff --git a/modules/nf-core/somalier/relate/main.nf b/modules/nf-core/somalier/relate/main.nf index 45a4c1e8..fb9d3742 100644 --- a/modules/nf-core/somalier/relate/main.nf +++ b/modules/nf-core/somalier/relate/main.nf @@ -5,8 +5,8 @@ process SOMALIER_RELATE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/somalier:0.2.15--h37c5b7d_0': - 'biocontainers/somalier:0.2.15--h37c5b7d_0' }" + 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0': + 'biocontainers/somalier:0.2.18--hb57907c_0' }" input: tuple val(meta), path(extract), path(ped) diff --git a/modules/nf-core/somalier/relate/meta.yml b/modules/nf-core/somalier/relate/meta.yml index d1d81c87..42638f4f 100644 --- a/modules/nf-core/somalier/relate/meta.yml +++ b/modules/nf-core/somalier/relate/meta.yml @@ -19,7 +19,7 @@ tools: documentation: "https://github.com/brentp/somalier/blob/master/README.md" tool_dev_url: "https://github.com/brentp/somalier" doi: "10.1186/s13073-020-00761-2" - licence: "MIT License" + licence: ["MIT"] input: - meta: type: map @@ -27,7 +27,7 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - extract: - type: file(s) + type: file description: extract file(s) from Somalier extract pattern: "*.somalier" - ped: diff --git a/modules/nf-core/somalier/relate/somalier-relate.diff b/modules/nf-core/somalier/relate/somalier-relate.diff new file mode 100644 index 00000000..f583d612 --- /dev/null +++ b/modules/nf-core/somalier/relate/somalier-relate.diff @@ -0,0 +1,25 @@ +Changes in module 'nf-core/somalier/relate' +--- modules/nf-core/somalier/relate/main.nf ++++ modules/nf-core/somalier/relate/main.nf +@@ -5,8 +5,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/somalier:0.2.19--h0c29559_0': +- 'biocontainers/somalier:0.2.19--h0c29559_0' }" ++ 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0': ++ 'biocontainers/somalier:0.2.18--hb57907c_0' }" + + input: + tuple val(meta), path(extract), path(ped) + +--- modules/nf-core/somalier/relate/environment.yml ++++ modules/nf-core/somalier/relate/environment.yml +@@ -4,4 +4,4 @@ + - bioconda + - defaults + dependencies: +- - bioconda::somalier=0.2.19 ++ - bioconda::somalier=0.2.18 + +************************************************************ diff --git a/modules/nf-core/somalier/relate/tests/main.nf.test b/modules/nf-core/somalier/relate/tests/main.nf.test new file mode 100644 index 00000000..5f17456d --- /dev/null +++ b/modules/nf-core/somalier/relate/tests/main.nf.test @@ -0,0 +1,124 @@ +nextflow_process { + + name "Test Process SOMALIER_RELATE" + script "../main.nf" + process "SOMALIER_RELATE" + + tag "modules" + tag "modules_nfcore" + tag "somalier" + tag "somalier/relate" + + test("[ delete_me, [] ], []") { + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ delete_me, ped ], groups") { + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + file(params.modules_testdata_base_path + 'delete_me/somalier/family.ped', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'delete_me/somalier/groups.txt', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ delete_me, [] ], [] -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ delete_me, ped ], groups -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + file(params.modules_testdata_base_path + 'delete_me/somalier/family.ped', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'delete_me/somalier/groups.txt', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/somalier/relate/tests/main.nf.test.snap b/modules/nf-core/somalier/relate/tests/main.nf.test.snap new file mode 100644 index 00000000..54a73033 --- /dev/null +++ b/modules/nf-core/somalier/relate/tests/main.nf.test.snap @@ -0,0 +1,286 @@ +{ + "[ delete_me, [] ], []": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,03cac9b2c67a8a06f63e07f83ee11e18" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,54d1e9fca1bf9d747d4254c6fa98edcf" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,97257d88886db1325c4d7d10cefa7169" + ] + ], + "3": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,03cac9b2c67a8a06f63e07f83ee11e18" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,54d1e9fca1bf9d747d4254c6fa98edcf" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,97257d88886db1325c4d7d10cefa7169" + ] + ], + "versions": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T05:29:21.162582556" + }, + "[ delete_me, [] ], [] -stub": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T05:29:43.887124223" + }, + "[ delete_me, ped ], groups -stub": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T05:29:55.034913513" + }, + "[ delete_me, ped ], groups": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,0d573016c9279ccdfdcfd4eb01d73b89" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,8655714f1e5359329188e9f501168131" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,38ef93340e55fbeef47640abda9e48b0" + ] + ], + "3": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,0d573016c9279ccdfdcfd4eb01d73b89" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,8655714f1e5359329188e9f501168131" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,38ef93340e55fbeef47640abda9e48b0" + ] + ], + "versions": [ + "versions.yml:md5,59d805a9f89558414535c136c814bea6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T05:29:32.451456985" + } +} \ No newline at end of file diff --git a/modules/nf-core/somalier/relate/tests/tags.yml b/modules/nf-core/somalier/relate/tests/tags.yml new file mode 100644 index 00000000..c80de730 --- /dev/null +++ b/modules/nf-core/somalier/relate/tests/tags.yml @@ -0,0 +1,2 @@ +somalier/relate: + - "modules/nf-core/somalier/relate/**" diff --git a/modules/nf-core/splitubam/environment.yml b/modules/nf-core/splitubam/environment.yml new file mode 100644 index 00000000..ef128202 --- /dev/null +++ b/modules/nf-core/splitubam/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "splitubam" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::splitubam=0.1.1" diff --git a/modules/nf-core/splitubam/main.nf b/modules/nf-core/splitubam/main.nf new file mode 100644 index 00000000..3d413d69 --- /dev/null +++ b/modules/nf-core/splitubam/main.nf @@ -0,0 +1,53 @@ +process SPLITUBAM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/splitubam:0.1.1--hc9368f3_0': + 'biocontainers/splitubam:0.1.1--hc9368f3_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + splitubam \\ + $args \\ + --threads $task.cpus \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + splitubam: \$(splitubam --version | sed 's/splitubam //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def match = (args =~ /--split\s+(\d+)/) + def create_cmd = "" + if (match) { + def n_splits = match[0][1].toInteger() + (1..n_splits).each { i -> + def formattedIteration = String.format('%03d', i) + create_cmd += "touch ${formattedIteration}.${bam}.bam\n" + } + } else { error("No `--split N` detected in args") } + """ + $create_cmd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + splitubam: \$(splitubam --version | sed 's/splitubam //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/splitubam/meta.yml b/modules/nf-core/splitubam/meta.yml new file mode 100644 index 00000000..3d11eaae --- /dev/null +++ b/modules/nf-core/splitubam/meta.yml @@ -0,0 +1,46 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "splitubam" +description: split one ubam into multiple, per line, fast +keywords: + - long-read + - bam + - genomics +tools: + - "splitubam": + description: "Split one ubam into multiple, per line, fast" + homepage: "https://github.com/fellen31/splitubam" + documentation: "https://github.com/fellen31/splitubam" + tool_dev_url: "https://github.com/fellen31/splitubam" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:true ]` + - bam: + type: file + description: (u)BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Split (u)BAM file + pattern: "*.{bam}" + +authors: + - "@fellen31" +maintainers: + - "@fellen31" diff --git a/modules/nf-core/splitubam/tests/main.nf.test b/modules/nf-core/splitubam/tests/main.nf.test new file mode 100644 index 00000000..270df28b --- /dev/null +++ b/modules/nf-core/splitubam/tests/main.nf.test @@ -0,0 +1,67 @@ + +nextflow_process { + + name "Test Process SPLITUBAM" + script "../main.nf" + process "SPLITUBAM" + + tag "modules" + tag "modules_nfcore" + tag "splitubam" + + test("sarscov2 - bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam.get(0).get(1).get(0)).getHeader(), + bam(process.out.bam.get(0).get(1).get(0)).getReadsMD5(), + bam(process.out.bam.get(0).get(1).get(1)).getHeader(), + bam(process.out.bam.get(0).get(1).get(1)).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/splitubam/tests/main.nf.test.snap b/modules/nf-core/splitubam/tests/main.nf.test.snap new file mode 100644 index 00000000..df573efa --- /dev/null +++ b/modules/nf-core/splitubam/tests/main.nf.test.snap @@ -0,0 +1,73 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "001.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "002.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,78006e47ec8ddb5d6f098dcef4a3e099" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "001.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "002.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,78006e47ec8ddb5d6f098dcef4a3e099" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T13:07:19.115592832" + }, + "sarscov2 - bam": { + "content": [ + [ + "@HD\tVN:1.6\tSO:unsorted", + "@SQ\tSN:MT192765.1\tLN:29829", + "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam", + "@PG\tID:splitbam\tPN:splitbam\tVN:0.1.0\tCL:splitubam --split 2 --threads 2 test.paired_end.bam" + ], + "4933fd727ab0ca4e215dddee7de73a2c", + [ + "@HD\tVN:1.6\tSO:unsorted", + "@SQ\tSN:MT192765.1\tLN:29829", + "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam", + "@PG\tID:splitbam\tPN:splitbam\tVN:0.1.0\tCL:splitubam --split 2 --threads 2 test.paired_end.bam" + ], + "1a9e3bfa97c43dcbeba1ed01e51a6a54", + [ + "versions.yml:md5,e5c9bb35328e8dcde2e934d9e6729fa6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T13:07:07.013916943" + } +} \ No newline at end of file diff --git a/modules/nf-core/splitubam/tests/nextflow.config b/modules/nf-core/splitubam/tests/nextflow.config new file mode 100644 index 00000000..191f4bfb --- /dev/null +++ b/modules/nf-core/splitubam/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'SPLITUBAM' { + ext.args = '--split 2' + } +} diff --git a/modules/nf-core/splitubam/tests/tags.yml b/modules/nf-core/splitubam/tests/tags.yml new file mode 100644 index 00000000..3a60af57 --- /dev/null +++ b/modules/nf-core/splitubam/tests/tags.yml @@ -0,0 +1,2 @@ +splitubam: + - "modules/nf-core/splitubam/**" diff --git a/modules/nf-core/stranger/environment.yml b/modules/nf-core/stranger/environment.yml new file mode 100644 index 00000000..530ce375 --- /dev/null +++ b/modules/nf-core/stranger/environment.yml @@ -0,0 +1,7 @@ +name: stranger +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::stranger=0.9.1 diff --git a/modules/nf-core/stranger/main.nf b/modules/nf-core/stranger/main.nf new file mode 100644 index 00000000..923483f4 --- /dev/null +++ b/modules/nf-core/stranger/main.nf @@ -0,0 +1,47 @@ +process STRANGER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stranger:0.9.1--pyh7e72e81_0': + 'biocontainers/stranger:0.9.1--pyh7e72e81_0' }" + + input: + tuple val(meta), path(vcf) + tuple val(meta2), path(variant_catalog) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_variant_catalog = variant_catalog ? "--repeats-file $variant_catalog" : "" + """ + stranger \\ + $args \\ + $vcf \\ + $options_variant_catalog | gzip --no-name > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stranger: \$( stranger --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stranger: \$( stranger --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stranger/meta.yml b/modules/nf-core/stranger/meta.yml new file mode 100644 index 00000000..5e0bc0bb --- /dev/null +++ b/modules/nf-core/stranger/meta.yml @@ -0,0 +1,52 @@ +name: stranger +description: Annotates output files from ExpansionHunter with the pathologic implications of the repeat sizes. +keywords: + - STR + - repeat_expansions + - annotate + - vcf +tools: + - stranger: + description: Annotate VCF files with str variants + homepage: https://github.com/moonso/stranger + documentation: https://github.com/moonso/stranger + tool_dev_url: https://github.com/moonso/stranger + doi: "10.5281/zenodo.4548873" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF with repeat expansions + pattern: "*.{vcf.gz,vcf}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - variant_catalog: + type: file + description: json file with repeat expansion sites to genotype + pattern: "*.{json}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: annotated VCF with keys STR_STATUS, NormalMax and PathologicMin + pattern: "*.{vcf.gz}" +authors: + - "@ljmesi" +maintainers: + - "@ljmesi" diff --git a/modules/nf-core/stranger/tests/main.nf.test b/modules/nf-core/stranger/tests/main.nf.test new file mode 100644 index 00000000..13c61f43 --- /dev/null +++ b/modules/nf-core/stranger/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process STRANGER" + script "../main.nf" + process "STRANGER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "stranger" + tag "expansionhunter" + + setup { + run("EXPANSIONHUNTER") { + script "../../expansionhunter/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [id:'fasta'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'fai'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [id:'catalogue'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/loci/repeat_expansions.json', checkIfExists: true) + ] + """ + } + } + } + + test("homo_sapiens - vcf, repeat_catalogue") { + + when { + process { + """ + input[0] = EXPANSIONHUNTER.out.vcf + input[1] = [ + [id:'catalogue'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/loci/repeat_expansions.json', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + + test("homo_sapiens - vcf, repeat_catalogue - stub") { + + options "-stub" + + when { + process { + """ + input[0] = EXPANSIONHUNTER.out.vcf + input[1] = [ + [id:'catalogue'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/loci/repeat_expansions.json', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/modules/nf-core/stranger/tests/main.nf.test.snap b/modules/nf-core/stranger/tests/main.nf.test.snap new file mode 100644 index 00000000..3faa4e58 --- /dev/null +++ b/modules/nf-core/stranger/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "homo_sapiens - vcf, repeat_catalogue - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "stranger.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c" + ], + "vcf": [ + [ + { + "id": "test" + }, + "stranger.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T08:09:27.725425094" + }, + "homo_sapiens - vcf, repeat_catalogue": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "stranger.vcf.gz:md5,e933cd71d9f9f146265b8e8dd90b712c" + ] + ], + "1": [ + "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c" + ], + "vcf": [ + [ + { + "id": "test" + }, + "stranger.vcf.gz:md5,e933cd71d9f9f146265b8e8dd90b712c" + ] + ], + "versions": [ + "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T08:09:14.472394433" + } +} \ No newline at end of file diff --git a/modules/nf-core/stranger/tests/nextflow.config b/modules/nf-core/stranger/tests/nextflow.config new file mode 100644 index 00000000..98459330 --- /dev/null +++ b/modules/nf-core/stranger/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'STRANGER' { + ext.prefix = "stranger" + } + } diff --git a/modules/nf-core/stranger/tests/tags.yml b/modules/nf-core/stranger/tests/tags.yml new file mode 100644 index 00000000..6d11ef42 --- /dev/null +++ b/modules/nf-core/stranger/tests/tags.yml @@ -0,0 +1,2 @@ +stranger: + - "modules/nf-core/stranger/**" diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml new file mode 100644 index 00000000..56cc0fb1 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..67991c74 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,55 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + echo "" | gzip > ${output} + touch ${output}.gzi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 00000000..621d49ea --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,52 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 00000000..6b6ff55f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test new file mode 100644 index 00000000..d784aa07 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 00000000..0748143f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:34.159992362" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:22.087769106" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:57.15091665" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:45.219404786" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml new file mode 100644 index 00000000..de0eec86 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 00000000..f3a3c467 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml index 30128876..b9644de9 100644 --- a/modules/nf-core/tabix/bgziptabix/environment.yml +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -1,10 +1,8 @@ name: tabix_bgziptabix - channels: - conda-forge - bioconda - defaults - dependencies: - - bioconda::htslib=1.19.1 - bioconda::tabix=1.11 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf index bcdcf2a6..05041f49 100644 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -4,8 +4,8 @@ process TABIX_BGZIPTABIX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : - 'biocontainers/htslib:1.19.1--h81da01d_1' }" + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" input: tuple val(meta), path(input) @@ -24,7 +24,7 @@ process TABIX_BGZIPTABIX { def prefix = task.ext.prefix ?: "${meta.id}" """ bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz - tabix $args2 ${prefix}.${input.getExtension()}.gz + tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test index 87ea2c84..1a84d74f 100644 --- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test @@ -17,7 +17,7 @@ nextflow_process { """ input[0] = [ [ id:'tbi_test' ], - [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] ] """ } @@ -43,7 +43,7 @@ nextflow_process { """ input[0] = [ [ id:'csi_test' ], - [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] ] """ } @@ -72,7 +72,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] ] """ } diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap index fcecb2e4..c166ea72 100644 --- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap @@ -8,14 +8,14 @@ "id": "tbi_test" }, "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4" + "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" ] ], "1": [ ], "2": [ - "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" ], "gz_csi": [ @@ -26,15 +26,19 @@ "id": "tbi_test" }, "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4" + "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" ] ], "versions": [ - "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" ] } ], - "timestamp": "2024-02-19T14:50:51.513838" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:29:16.053817543" }, "sarscov2_bed_csi": { "content": [ @@ -48,11 +52,11 @@ "id": "csi_test" }, "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43" + "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" ] ], "2": [ - "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" ], "gz_csi": [ [ @@ -60,35 +64,51 @@ "id": "csi_test" }, "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43" + "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" ] ], "gz_tbi": [ ], "versions": [ - "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" ] } ], - "timestamp": "2024-02-19T14:51:00.513777" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:29:27.667745444" }, "csi_test": { "content": [ "csi_test.bed.gz" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-02-19T14:51:00.548801" }, "csi_stub": { "content": [ "test.bed.gz" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-02-19T14:51:09.218454" }, "tbi_test": { "content": [ "tbi_test.bed.gz" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-02-19T14:50:51.579654" }, "sarscov2_bed_csi_stub": { @@ -113,7 +133,7 @@ ] ], "2": [ - "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" ], "gz_csi": [ [ @@ -134,10 +154,14 @@ ] ], "versions": [ - "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" ] } ], - "timestamp": "2024-02-19T14:51:09.164254" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:29:45.105209991" } } \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml index a07ad9d1..4d1f9dd4 100644 --- a/modules/nf-core/tabix/tabix/environment.yml +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -6,5 +6,5 @@ channels: - defaults dependencies: - - bioconda::htslib=1.19.1 + - bioconda::htslib=1.20 - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 1737141d..13acd670 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -4,8 +4,8 @@ process TABIX_TABIX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : - 'biocontainers/htslib:1.19.1--h81da01d_1' }" + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" input: tuple val(meta), path(tab) @@ -21,7 +21,10 @@ process TABIX_TABIX { script: def args = task.ext.args ?: '' """ - tabix $args $tab + tabix \\ + --threads $task.cpus \\ + $args \\ + $tab cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,8 +36,8 @@ process TABIX_TABIX { """ touch ${tab}.tbi touch ${tab}.csi - cat <<-END_VERSIONS > versions.yml + cat <<-END_VERSIONS > versions.yml "${task.process}": tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 3a150c70..102b0d7b 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id:'tbi_bed' ], - [ file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] ] """ } @@ -25,11 +25,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, { assert snapshot( - file(process.out.tbi[0][1]).name - ).match("tbi_bed") - } + process.out, + file(process.out.tbi[0][1]).name + ).match() } ) } } @@ -41,7 +40,7 @@ nextflow_process { """ input[0] = [ [ id:'tbi_gff' ], - [ file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] ] """ } @@ -50,11 +49,9 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, { assert snapshot( - file(process.out.tbi[0][1]).name - ).match("tbi_gff") - } + process.out, + file(process.out.tbi[0][1]).name).match() } ) } @@ -67,7 +64,7 @@ nextflow_process { """ input[0] = [ [ id:'tbi_vcf' ], - [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] ] """ } @@ -76,11 +73,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, { assert snapshot( - file(process.out.tbi[0][1]).name - ).match("tbi_vcf") - } + process.out, + file(process.out.tbi[0][1]).name + ).match() } ) } @@ -93,7 +89,7 @@ nextflow_process { """ input[0] = [ [ id:'vcf_csi' ], - [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] ] """ } @@ -102,11 +98,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, { assert snapshot( - file(process.out.csi[0][1]).name - ).match("vcf_csi") - } + process.out, + file(process.out.csi[0][1]).name + ).match() } ) } @@ -120,7 +115,7 @@ nextflow_process { """ input[0] = [ [ id:'vcf_csi_stub' ], - [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] ] """ } @@ -129,11 +124,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, { assert snapshot( - file(process.out.csi[0][1]).name - ).match("vcf_csi_stub") - } + process.out, + file(process.out.csi[0][1]).name + ).match() } ) } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 034e38b6..c2b9ed0b 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -1,16 +1,4 @@ { - "vcf_csi_stub": { - "content": [ - "test.vcf.gz.csi" - ], - "timestamp": "2024-03-04T14:51:59.788002" - }, - "tbi_gff": { - "content": [ - "genome.gff3.gz.tbi" - ], - "timestamp": "2024-02-19T14:53:37.420216" - }, "sarscov2_gff_tbi": { "content": [ { @@ -19,14 +7,14 @@ { "id": "tbi_gff" }, - "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" ] ], "1": [ ], "2": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ], "csi": [ @@ -36,15 +24,20 @@ { "id": "tbi_gff" }, - "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" ] ], "versions": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ] - } + }, + "genome.gff3.gz.tbi" ], - "timestamp": "2024-02-19T14:53:37.388157" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:25.653807564" }, "sarscov2_bedgz_tbi": { "content": [ @@ -54,14 +47,14 @@ { "id": "tbi_bed" }, - "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" ] ], "1": [ ], "2": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ], "csi": [ @@ -71,27 +64,20 @@ { "id": "tbi_bed" }, - "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" ] ], "versions": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ] - } - ], - "timestamp": "2024-02-19T14:53:28.879408" - }, - "tbi_vcf": { - "content": [ - "test.vcf.gz.tbi" - ], - "timestamp": "2024-02-19T14:53:46.402522" - }, - "vcf_csi": { - "content": [ - "test.vcf.gz.csi" + }, + "test.bed.gz.tbi" ], - "timestamp": "2024-02-19T14:53:54.921189" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:09.754082161" }, "sarscov2_vcf_tbi": { "content": [ @@ -101,14 +87,14 @@ { "id": "tbi_vcf" }, - "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" ] ], "1": [ ], "2": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ], "csi": [ @@ -118,15 +104,20 @@ { "id": "tbi_vcf" }, - "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" ] ], "versions": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ] - } + }, + "test.vcf.gz.tbi" ], - "timestamp": "2024-02-19T14:53:46.370358" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:40.042648294" }, "sarscov2_vcf_csi_stub": { "content": [ @@ -148,7 +139,7 @@ ] ], "2": [ - "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ], "csi": [ [ @@ -167,11 +158,16 @@ ] ], "versions": [ - "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ] - } + }, + "test.vcf.gz.csi" ], - "timestamp": "2024-03-04T14:51:59.766184" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:07:08.700367261" }, "sarscov2_vcf_csi": { "content": [ @@ -184,34 +180,33 @@ { "id": "vcf_csi" }, - "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" ] ], "2": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ], "csi": [ [ { "id": "vcf_csi" }, - "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" ] ], "tbi": [ ], "versions": [ - "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + "versions.yml:md5,07064637fb8a217174052be8e40234e2" ] - } - ], - "timestamp": "2024-02-19T14:53:54.886876" - }, - "tbi_bed": { - "content": [ - "test.bed.gz.tbi" + }, + "test.vcf.gz.csi" ], - "timestamp": "2024-02-19T14:53:28.947628" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:55.362067748" } } \ No newline at end of file diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml index 0c9cbb10..4f498244 100644 --- a/modules/nf-core/untar/environment.yml +++ b/modules/nf-core/untar/environment.yml @@ -1,11 +1,9 @@ name: untar - channels: - conda-forge - bioconda - defaults - dependencies: - conda-forge::grep=3.11 - - conda-forge::sed=4.7 + - conda-forge::sed=4.8 - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8a75bb95..9bd8f554 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -4,8 +4,8 @@ process UNTAR { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -52,8 +52,29 @@ process UNTAR { stub: prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - mkdir $prefix - touch ${prefix}/file.txt + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test index 2a7c97bf..c957517a 100644 --- a/modules/nf-core/untar/tests/main.nf.test +++ b/modules/nf-core/untar/tests/main.nf.test @@ -6,6 +6,7 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "untar" + test("test_untar") { when { @@ -19,10 +20,9 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar") }, + { assert snapshot(process.out).match() }, ) } - } test("test_untar_onlyfiles") { @@ -38,10 +38,48 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + { assert snapshot(process.out).match() }, ) } + } + + test("test_untar - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } } + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } } diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap index 64550292..ceb91b79 100644 --- a/modules/nf-core/untar/tests/main.nf.test.snap +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -1,42 +1,158 @@ { "test_untar_onlyfiles": { "content": [ - [ - [ + { + "0": [ [ - - ], + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ [ - "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] - ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-28T11:49:41.320643" + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" }, "test_untar": { "content": [ - [ - [ + { + "0": [ [ - - ], + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ [ - "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", - "opts.k2d:md5,a033d00cf6759407010b21700938f543", - "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] - ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-28T11:49:33.795172" + "timestamp": "2024-07-10T12:04:19.377674" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 1806b025..44e6630e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,24 +12,33 @@ params { // Input options input = null bed = null - dipcall_par = null - extra_snfs = null - extra_gvcfs = null + cadd_resources = null + cadd_prescored = null + par_regions = null tandem_repeats = null trgt_repeats = null + variant_catalog = null + reduced_penetrance = null + score_config_snv = null snp_db = null + variant_consequences_snv = null vep_cache = null + vep_plugin_files = null hificnv_xy = null hificnv_xx = null hificnv_exclude = null somalier_sites = null + deepvariant_model_type = params.preset == 'ONT_R10' ? 'ONT_R104' : 'PACBIO' // Skip parts of the workflow + skip_aligned_read_qc = false skip_raw_read_qc = false - skip_assembly_wf = false + skip_assembly_wf = params.preset == 'ONT_R10' ? true : false skip_mapping_wf = false - skip_methylation_wf = false - skip_repeat_wf = false + skip_methylation_wf = params.preset == 'pacbio' ? true : false + skip_rank_variants = false + skip_repeat_calling = params.preset == 'ONT_R10' ? true : false + skip_repeat_annotation = params.preset == 'ONT_R10' ? true : false skip_phasing_wf = false skip_short_variant_calling = false skip_snv_annotation = false @@ -45,7 +54,7 @@ params { // Preprocessing/parallelisation parallel_snv = 13 - split_fastq = 0 + parallel_alignments = 1 // References fasta = null @@ -279,23 +288,28 @@ manifest { description = """Long-read variant calling pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.2.0' + version = '0.3.0' doi = '' } // Load modules.config for DSL2 module specific options includeConfig 'conf/base.config' +includeConfig 'conf/modules/annotate_cadd.config' +includeConfig 'conf/modules/annotate_consequence_pli.config' +includeConfig 'conf/modules/annotate_repeat_expansions.config' includeConfig 'conf/modules/general.config' includeConfig 'conf/modules/bam_infer_sex.config' -includeConfig 'conf/modules/bam_to_fastq.config' includeConfig 'conf/modules/call_paralogs.config' +includeConfig 'conf/modules/call_repeat_expansions.config' +includeConfig 'conf/modules/convert_input_files.config' includeConfig 'conf/modules/assembly_variant_calling.config' includeConfig 'conf/modules/genome_assembly.config' includeConfig 'conf/modules/methylation.config' includeConfig 'conf/modules/phasing.config' includeConfig 'conf/modules/prepare_genome.config' -includeConfig 'conf/modules/qc.config' -includeConfig 'conf/modules/repeat_calling.config' +includeConfig 'conf/modules/qc_aligned_reads.config' +includeConfig 'conf/modules/scatter_genome.config' +includeConfig 'conf/modules/rank_variants.config' includeConfig 'conf/modules/short_variant_calling.config' includeConfig 'conf/modules/snv_annotation.config' includeConfig 'conf/modules/structural_variant_calling.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index f0601054..8e83ba04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -8,55 +8,82 @@ "workflow_skip_options": { "title": "Workflow skip options", "type": "object", - "description": "Options to skip various steps within the workflow", + "description": "Allows skipping certain parts of the pipeline", "default": "", "properties": { + "skip_aligned_read_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip QC of aligned reads", + "default": false + }, "skip_raw_read_qc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip raw read QC" + "description": "Skip QC of unaligned (raw) reads", + "default": false }, "skip_short_variant_calling": { "type": "boolean", "description": "Skip short variant calling", - "fa_icon": "fas fa-fast-forward" + "fa_icon": "fas fa-fast-forward", + "default": false }, "skip_assembly_wf": { "type": "boolean", - "description": "Skip assembly and downstream processes", - "fa_icon": "fas fa-fast-forward" + "description": "Skip genome assembly and assembly variant calling", + "fa_icon": "fas fa-fast-forward", + "default": false }, "skip_mapping_wf": { "type": "boolean", - "description": "Skip read mapping and downstream processes", - "fa_icon": "fas fa-fast-forward" + "description": "Skip read mapping (alignment)", + "fa_icon": "fas fa-fast-forward", + "default": false }, "skip_methylation_wf": { "type": "boolean", - "description": "Skip methylation workflow", - "fa_icon": "fas fa-fast-forward" + "description": "Skip generation of methylation pileups", + "fa_icon": "fas fa-fast-forward", + "default": false }, - "skip_repeat_wf": { + "skip_repeat_calling": { "type": "boolean", - "description": "Skip repeat analysis workflow", - "fa_icon": "fas fa-fast-forward" + "description": "Skip tandem repeat calling", + "fa_icon": "fas fa-fast-forward", + "default": false + }, + "skip_repeat_annotation": { + "type": "boolean", + "description": "Skip tandem repeat annotation", + "fa_icon": "fas fa-fast-forward", + "default": false }, "skip_phasing_wf": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip phasing workflow" + "description": "Skip phasing of variants and haplotagging of reads", + "default": false }, "skip_snv_annotation": { "type": "boolean", - "description": "Skip SNV annotation" + "description": "Skip short variant annotation", + "default": false }, "skip_cnv_calling": { "type": "boolean", - "description": "Skip CNV workflow" + "description": "Skip CNV calling", + "default": false }, "skip_call_paralogs": { "type": "boolean", - "description": "Skip call paralogs (Paraphase)" + "description": "Skip the calling of specific paralogous genes", + "default": false + }, + "skip_rank_variants": { + "type": "boolean", + "description": "Skip ranking of short variants", + "default": false } }, "fa_icon": "fas fa-american-sign-language-interpreting" @@ -325,92 +352,105 @@ "workflow_options": { "title": "Workflow options", "type": "object", - "description": "", + "description": "Workflow options specific to genomic-medicine-sweden/nallo", "default": "", "properties": { "preset": { "type": "string", "default": "revio", - "description": "Choose a preset depending on data type", + "description": "Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`)", "enum": ["revio", "pacbio", "ONT_R10"] }, "variant_caller": { "type": "string", "default": "deepvariant", - "description": "Choose variant caller", + "description": "Which short variant software to use (`deepvariant`)", "enum": ["deepvariant"] }, "phaser": { "type": "string", "default": "whatshap", - "description": "Choose phasing software", + "description": "Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`)", "enum": ["whatshap", "hiphase_snv", "hiphase_sv"] }, "hifiasm_mode": { "type": "string", - "description": "Run hifiasm in hifi-only or hifi-trio mode", + "description": "Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`)", "enum": ["hifi-only", "trio-binning"], "default": "hifi-only" }, - "split_fastq": { + "parallel_alignments": { "type": "integer", - "default": 0, - "minimum": 0, - "description": "Split alignment into n processes" + "minimum": 1, + "default": 1, + "description": "If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time." }, "parallel_snv": { "type": "integer", "default": 13, - "description": "Split SNV calling into n chunks", + "description": "If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time.", "minimum": 1 }, "vep_cache_version": { "type": "integer", "default": 110, "description": "VEP cache version" + }, + "vep_plugin_files": { + "type": "string", + "mimetype": "text/csv", + "description": "A csv file with paths to vep plugin files. pLI and LoFtool are required.", + "schema": "assets/vep_plugin_files_schema.json" + }, + "deepvariant_model_type": { + "type": "string", + "default": "PACBIO", + "description": "Sets the model type used for DeepVariant. This is set automatically using `--preset` by default.", + "hidden": true, + "enum": ["PACBIO", "ONT_R104"] } }, "required": ["preset"] }, - "extra_file_inputs": { - "title": "Extra file inputs", + "file_inputs": { + "title": "File inputs", "type": "object", - "description": "Different processes may need extra input files", + "description": "The different files that are required. Some are only required by certain workflows, see the usage documentation.", "default": "", "fa_icon": "fas fa-copy", "properties": { - "dipcall_par": { + "cadd_prescored": { "type": "string", - "description": "Provide a bed file of chrX PAR regions for dipcall", - "format": "file-path", - "exists": true + "exists": true, + "format": "directory-path", + "fa_icon": "fas fa-file", + "description": "Path to a directory containing prescored indels for CADD.", + "help_text": "This folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation." }, - "extra_gvcfs": { + "cadd_resources": { "type": "string", - "description": "Extra input files for GLNexus", - "pattern": "^\\S+\\.csv$", - "format": "file-path", - "schema": "assets/schema_gvcfs.json", - "exists": true + "exists": true, + "format": "directory-path", + "fa_icon": "fas fa-file", + "description": "Path to a directory containing CADD annotations.", + "help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation." }, - "extra_snfs": { + "par_regions": { "type": "string", - "description": "Extra input files for Sniffles", - "pattern": "^\\S+\\.csv$", + "description": "Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant", "format": "file-path", - "schema": "assets/schema_snfs.json", "exists": true }, "tandem_repeats": { "type": "string", "format": "file-path", - "description": "Tandem repeat BED-file for sniffles", + "description": "A tandem repeat BED file for sniffles", "pattern": "^\\S+\\.bed$", "exists": true }, "trgt_repeats": { "type": "string", - "description": "BED-file for repeats to be genotyped", + "description": "A BED file with repeats to be genotyped with TRGT", "format": "file-path", "exists": true }, @@ -420,12 +460,24 @@ "format": "file-path", "mimetype": "text/csv", "schema": "/assets/schema_snpdb.json", - "description": "Extra echtvar-databases to annotate SNVs with", + "description": "A csv file with echtvar databases to annotate SNVs with", + "exists": true + }, + "variant_catalog": { + "type": "string", + "description": "A variant catalog json-file for stranger", + "format": "file-path", "exists": true }, + "variant_consequences_snv": { + "type": "string", + "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs.", + "help_text": "For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html", + "fa_icon": "fas fa-file-csv" + }, "vep_cache": { "type": "string", - "description": "Path to directory of vep_cache", + "description": "A path to the VEP cache location", "format": "path", "exists": true }, @@ -433,29 +485,45 @@ "type": "string", "pattern": "^\\S+\\.bed$", "format": "file-path", - "description": "BED file with regions of interest", + "description": "A BED file with regions of interest, used to limit short variant calling.", "exists": true }, "hificnv_xy": { "type": "string", "format": "file-path", + "description": "A BED file containing expected copy number regions for XY samples.", "exists": true }, "hificnv_xx": { "type": "string", "format": "file-path", + "description": "A BED file containing expected copy number regions for XX samples.", "exists": true }, "hificnv_exclude": { "type": "string", "format": "file-path", - "description": "HiFiCNV BED file specifying regions to exclude", + "description": "A BED file specifying regions to exclude with HiFiCNV, such as centromeres.", "exists": true }, + "reduced_penetrance": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file-csv", + "description": "A file with gene ids that have reduced penetrance. For use with genmod." + }, + "score_config_snv": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "description": "A SNV rank model config file for genmod." + }, "somalier_sites": { "type": "string", "pattern": "^\\S+\\.vcf(\\.gz)?$", - "description": "A VCF of known polymorphic sites", + "description": "A VCF of known polymorphic sites for somalier", "format": "file-path", "exists": true }, @@ -464,7 +532,7 @@ "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + "help_text": "By default, when an unrecognised parameter is found, it returns a warning." }, "validationLenientMode": { "type": "boolean", @@ -506,7 +574,7 @@ "$ref": "#/definitions/workflow_options" }, { - "$ref": "#/definitions/extra_file_inputs" + "$ref": "#/definitions/file_inputs" } ] } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..67cadb45 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,13 @@ +config { + plugins { + load "nft-bam@0.3.0" + } + stage { + copy './nextflow_schema.json' + } + testsDir "." + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "" + +} diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf new file mode 100644 index 00000000..9a1dc047 --- /dev/null +++ b/subworkflows/local/annotate_cadd/main.nf @@ -0,0 +1,76 @@ +// +// A subworkflow to annotate snvs +// + +include { BCFTOOLS_ANNOTATE as ANNOTATE_INDELS } from '../../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_ANNOTATE as RENAME_CHRNAMES } from '../../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { CADD } from '../../../modules/nf-core/cadd/main' +include { GAWK as REFERENCE_TO_CADD_CHRNAMES } from '../../../modules/nf-core/gawk/main' +include { GAWK as CADD_TO_REFERENCE_CHRNAMES } from '../../../modules/nf-core/gawk/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_CADD } from '../../../modules/nf-core/tabix/tabix/main' + +workflow ANNOTATE_CADD { + + take: + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ] + ch_index // channel: [mandatory] [ val(meta), path(tbis) ] + ch_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(dir) ] + ch_cadd_prescored // channel: [mandatory] [ path(dir) ] + + main: + ch_versions = Channel.empty() + + REFERENCE_TO_CADD_CHRNAMES ( ch_fai , [] ) + ch_versions = ch_versions.mix(REFERENCE_TO_CADD_CHRNAMES.out.versions) + + CADD_TO_REFERENCE_CHRNAMES ( ch_fai , [] ) + ch_versions = ch_versions.mix(CADD_TO_REFERENCE_CHRNAMES.out.versions) + + ch_vcf + .join(ch_index) + .map { meta, vcf, tbi -> [ meta, vcf, tbi, [], [] ] } + .set { rename_chrnames_in } + + RENAME_CHRNAMES ( + rename_chrnames_in, + [], + REFERENCE_TO_CADD_CHRNAMES.out.output.map { meta, txt -> txt } + ) + ch_versions = ch_versions.mix(RENAME_CHRNAMES.out.versions) + + BCFTOOLS_VIEW ( + RENAME_CHRNAMES.out.vcf.map { meta, vcf -> [ meta, vcf, [] ] }, + [], + [], + [] + ) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) + + CADD ( BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, ch_cadd_prescored ) + ch_versions = ch_versions.mix(CADD.out.versions) + + TABIX_CADD ( CADD.out.tsv ) + ch_versions = ch_versions.mix(TABIX_CADD.out.versions) + + RENAME_CHRNAMES.out.vcf + .join(CADD.out.tsv) + .join(TABIX_CADD.out.tbi) + .map { meta, vcf, annotations, annotations_index -> [ meta, vcf, [], annotations, annotations_index ] } + .set { ch_annotate_indels_in } + + ANNOTATE_INDELS ( + ch_annotate_indels_in, + ch_header, + CADD_TO_REFERENCE_CHRNAMES.out.output.map { meta, txt -> txt } + ) + ch_versions = ch_versions.mix(ANNOTATE_INDELS.out.versions) + + emit: + vcf = ANNOTATE_INDELS.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = ANNOTATE_INDELS.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/annotate_consequence_pli.nf b/subworkflows/local/annotate_consequence_pli.nf new file mode 100644 index 00000000..203a1405 --- /dev/null +++ b/subworkflows/local/annotate_consequence_pli.nf @@ -0,0 +1,30 @@ +// +// A subworkflow to add most severe consequence and pli to a vep annotated vcf +// + +include { ADD_MOST_SEVERE_CSQ } from '../../modules/local/add_most_severe_consequence' +include { ADD_MOST_SEVERE_PLI } from '../../modules/local/add_most_severe_pli' +include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' + +workflow ANNOTATE_CSQ_PLI { + take: + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_variant_consequences // channel: [mandatory] [ path(consequences) ] + + main: + ch_versions = Channel.empty() + + ADD_MOST_SEVERE_CSQ (ch_vcf, ch_variant_consequences) + ch_versions = ch_versions.mix(ADD_MOST_SEVERE_CSQ.out.versions) + + ADD_MOST_SEVERE_PLI (ADD_MOST_SEVERE_CSQ.out.vcf) + ch_versions = ch_versions.mix(ADD_MOST_SEVERE_PLI.out.versions) + + TABIX_BGZIPTABIX (ADD_MOST_SEVERE_PLI.out.vcf) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + + emit: + vcf_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] } // channel: [ val(meta), path(vcf) ] + tbi_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, tbi ] } // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/annotate_repeat_expansions/main.nf b/subworkflows/local/annotate_repeat_expansions/main.nf new file mode 100644 index 00000000..62b138c6 --- /dev/null +++ b/subworkflows/local/annotate_repeat_expansions/main.nf @@ -0,0 +1,31 @@ +// +// Annotate repeat expansions +// + +include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../../modules/nf-core/bcftools/view/main' +include { STRANGER } from '../../../modules/nf-core/stranger/main' + +workflow ANNOTATE_REPEAT_EXPANSIONS { + take: + ch_variant_catalog // channel: [mandatory] [ path(variant_catalog.json) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + + main: + ch_versions = Channel.empty() + + // Annotate, compress and index + STRANGER ( ch_vcf, ch_variant_catalog ) + ch_versions = ch_versions.mix(STRANGER.out.versions) + + COMPRESS_STRANGER ( + STRANGER.out.vcf.map { meta, vcf -> [meta, vcf, [] ] }, + [], [], [] + ) + ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions) + + ch_vcf_idx = COMPRESS_STRANGER.out.vcf.join(COMPRESS_STRANGER.out.tbi, failOnMismatch:true, failOnDuplicate:true) + + emit: + vcf_idx = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/bam_infer_sex.nf b/subworkflows/local/bam_infer_sex.nf index 3f49e5e5..16486ea8 100644 --- a/subworkflows/local/bam_infer_sex.nf +++ b/subworkflows/local/bam_infer_sex.nf @@ -1,5 +1,6 @@ -include { SOMALIER_EXTRACT } from '../../modules/nf-core/somalier/extract/main' -include { SOMALIER_RELATE } from '../../modules/nf-core/somalier/relate/main' +include { SOMALIER_EXTRACT } from '../../modules/nf-core/somalier/extract/main' +include { SOMALIER_RELATE as RELATE_INFER } from '../../modules/nf-core/somalier/relate/main' +include { SOMALIER_RELATE as RELATE_RELATE } from '../../modules/nf-core/somalier/relate/main' workflow BAM_INFER_SEX { @@ -11,29 +12,27 @@ workflow BAM_INFER_SEX { ch_ped // channel: [ val(meta), path(ped) ] main: - ch_versions = Channel.empty() // Extract sites SOMALIER_EXTRACT ( ch_bam_bai, - ch_fasta.map { it[1] }, - ch_fai.map { it[1] }, - ch_somalier_sites.map { it[1] } + ch_fasta, + ch_fai, + ch_somalier_sites ) ch_versions = ch_versions.mix(SOMALIER_EXTRACT.out.versions) SOMALIER_EXTRACT.out.extract - .map { meta, extract -> [ [ id: 'multisample' ], extract ] } - .groupTuple() - .join( ch_ped.map { ped -> [ [ id:'multisample'], ped ] } ) - .set { ch_somalier_relate_in } + .combine( ch_ped.map { meta, ped -> ped } ) + .filter { meta, extract, ped -> meta.sex == 0 } + .set { ch_relate_infer_in } - // Infer sex - SOMALIER_RELATE ( ch_somalier_relate_in, [] ) - ch_versions = ch_versions.mix(SOMALIER_RELATE.out.versions) + // 1. Run somalier relate on one sample at a time to infer sex + RELATE_INFER ( ch_relate_infer_in, [] ) + ch_versions = ch_versions.mix(RELATE_INFER.out.versions) - SOMALIER_RELATE.out.samples_tsv + RELATE_INFER.out.samples_tsv .map { meta, tsv -> tsv } .splitCsv(header: true, sep: '\t') .set { somalier_tsv } @@ -47,30 +46,43 @@ workflow BAM_INFER_SEX { } .set { ch_somalier_sex } - // Use sex from somalier for samples with unknown sex (sex == 0) in samplesheet + // Branch on samples with known/unknown sex ch_bam_bai + .branch { meta, bam, bai -> + unknown_sex: meta.sex == 0 + known_sex: meta.sex != 0 + } + .set { ch_samples } + + // Update sex with sex from somalier for samples with unknown sex + ch_samples.unknown_sex .map { meta, bam, bai -> [ meta.id, meta, bam, bai ] } .join( ch_somalier_sex ) .map { id, meta, bam, bai, somalier -> - new_meta = [ - id : meta.id, - family_id : meta.family_id, - paternal_id : meta.paternal_id, - maternal_id : meta.maternal_id, - sex : meta.sex == 0 ? somalier.sex.toInteger() : meta.sex, - phenotype : meta.phenotype, - single_end : meta.single_end - ] - [ new_meta, bam, bai ] + updated_sex = (meta.sex == 0 ? somalier.sex.toInteger() : meta.sex) + [ meta + [sex: updated_sex], bam, bai ] } .set { ch_updated_sex } + // Add samples with known sex + ch_updated_sex = ch_updated_sex.mix(ch_samples.known_sex) + + // 2. Run relate on all samples at once to check relatedness + SOMALIER_EXTRACT.out.extract + .map { meta, extract -> [ [ id: meta.project ], extract ] } + .groupTuple() + .join( ch_ped ) + .set { ch_relate_relate_in } + + RELATE_RELATE ( ch_relate_relate_in, [] ) + ch_versions = ch_versions.mix(RELATE_RELATE.out.versions) + emit: bam = ch_updated_sex.map { meta, bam, bai -> [ meta, bam ] } // channel: [ val(meta), path(bam) ] bai = ch_updated_sex.map { meta, bam, bai -> [ meta, bai ] } // channel: [ val(meta), path(bai) ] bam_bai = ch_updated_sex // channel: [ val(meta), path(bam), path(bai) ] - somalier_samples = SOMALIER_RELATE.out.samples_tsv // channel: [ val(meta), path(samples_tsv) ] - somalier_pairs = SOMALIER_RELATE.out.pairs_tsv // channel: [ val(meta), path(pairs_tsv) ] + somalier_samples = RELATE_RELATE.out.samples_tsv // channel: [ val(meta), path(samples_tsv) ] + somalier_pairs = RELATE_RELATE.out.pairs_tsv // channel: [ val(meta), path(pairs_tsv) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bam_to_fastq.nf b/subworkflows/local/bam_to_fastq.nf deleted file mode 100644 index 0a7bf330..00000000 --- a/subworkflows/local/bam_to_fastq.nf +++ /dev/null @@ -1,32 +0,0 @@ -include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' - -workflow BAM_TO_FASTQ { - - take: - ch_sample // channel: [ val(meta), reads ] - - main: - ch_versions = Channel.empty() - - // Filter out BAM from fastq - ch_sample - .map { meta, fastq -> [ meta + [ 'single_end': true ], fastq ] } - .branch { meta, reads -> - fastq: reads.extension == 'gz' - bam: reads.extension == 'bam' - } - .set { ch_filetypes } - - ch_filetypes.fastq.set { ch_sample } - - SAMTOOLS_FASTQ ( ch_filetypes.bam, false ) - ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions) - - // Mix converted BAM back in - ch_sample = ch_sample.mix(SAMTOOLS_FASTQ.out.other) - - emit: - fastq = ch_sample // channel: [ val(meta), fastq ] - versions = ch_versions // channel: [ versions.yml ] -} - diff --git a/subworkflows/local/call_repeat_expansions/main.nf b/subworkflows/local/call_repeat_expansions/main.nf new file mode 100644 index 00000000..8b71453e --- /dev/null +++ b/subworkflows/local/call_repeat_expansions/main.nf @@ -0,0 +1,55 @@ +include { TRGT } from '../../../modules/local/trgt' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_TRGT } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_TRGT } from '../../../modules/nf-core/samtools/sort/main' +include { BCFTOOLS_SORT as BCFTOOLS_SORT_TRGT } from '../../../modules/nf-core/bcftools/sort/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_MERGE } from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' + +workflow CALL_REPEAT_EXPANSIONS { + + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_trgt_bed // channel: [mandatory] [ val(meta), path(bed) ] + + main: + ch_repeat_calls_vcf = Channel.empty() + ch_versions = Channel.empty() + + ch_bam_bai + .map { meta, bam, bai -> [meta, bam, bai, meta.sex] } + .set { ch_trgt_input } + + // Run TGRT + TRGT ( ch_trgt_input, ch_fasta, ch_fai, ch_trgt_bed.map { it[1] } ) + + // Sort and index bam + SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] ) + SAMTOOLS_INDEX_TRGT(SAMTOOLS_SORT_TRGT.out.bam) + + // Sort and index bcf + BCFTOOLS_SORT_TRGT(TRGT.out.vcf) + + BCFTOOLS_SORT_TRGT.out.vcf + .join( BCFTOOLS_SORT_TRGT.out.tbi ) + .map { meta, bcf, csi -> [ [ id : meta.project ], bcf, csi ] } + .groupTuple() + .set{ ch_bcftools_merge_in } + + BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [] ) + + BCFTOOLS_INDEX_MERGE ( BCFTOOLS_MERGE.out.merged_variants ) + + ch_versions = ch_versions.mix(TRGT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_MERGE.out.versions) + + emit: + vcf = BCFTOOLS_SORT_TRGT.out.vcf // channel: [ val(meta), path(vcf) ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/convert_input_files.nf b/subworkflows/local/convert_input_files.nf new file mode 100644 index 00000000..5c44e521 --- /dev/null +++ b/subworkflows/local/convert_input_files.nf @@ -0,0 +1,37 @@ +include { SAMTOOLS_IMPORT } from '../../modules/nf-core/samtools/import/main' +include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' + +workflow CONVERT_INPUT_FILES { + + take: + ch_sample // channel: [ val(meta), reads ] + + main: + ch_versions = Channel.empty() + + ch_sample + .branch { meta, reads -> + fastq: reads.extension == 'gz' + bam: reads.extension == 'bam' + } + .set { ch_filetypes } + + ch_bam = ch_filetypes.bam + ch_fastq = ch_filetypes.fastq + + SAMTOOLS_FASTQ ( ch_filetypes.bam, false ) + ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions) + + SAMTOOLS_IMPORT ( ch_filetypes.fastq ) + ch_versions = ch_versions.mix(SAMTOOLS_IMPORT.out.versions) + + // Mix converted files back in + ch_bam = ch_bam.mix(SAMTOOLS_IMPORT.out.bam) + ch_fastq = ch_fastq.mix(SAMTOOLS_FASTQ.out.other) + + emit: + bam = ch_bam // channel: [ val(meta), bam ] + fastq = ch_fastq // channel: [ val(meta), fastq ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/genome_assembly.nf b/subworkflows/local/genome_assembly.nf index 7764d15f..650af99f 100644 --- a/subworkflows/local/genome_assembly.nf +++ b/subworkflows/local/genome_assembly.nf @@ -27,7 +27,7 @@ workflow ASSEMBLY { ch_versions = ch_versions.mix(HIFIASM.out.versions) } else if(params.hifiasm_mode == 'trio-binning') { - // TODO: Multiple trios with different parents may not work? + // Multiple trios with different parents may not work? ch_reads.groupTuple() .map{ meta, reads -> meta } // Takes meta, then // combine to create all possible combinations of [ meta, meta ] diff --git a/subworkflows/local/methylation.nf b/subworkflows/local/methylation.nf index 8e7eea7d..6541a9f4 100644 --- a/subworkflows/local/methylation.nf +++ b/subworkflows/local/methylation.nf @@ -1,7 +1,7 @@ -include { MODKIT_PILEUP } from '../../modules/nf-core/modkit/pileup/main' -include { MODKIT_PILEUP as MODKIT_PILEUP_HAPLOTYPES } from '../../modules/nf-core/modkit/pileup/main' -include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP } from '../../modules/nf-core/tabix/bgziptabix/main' -include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP_HAPLOTYPES } from '../../modules/nf-core/tabix/bgziptabix/main' +include { MODKIT_PILEUP as MODKIT_PILEUP_UNPHASED } from '../../modules/nf-core/modkit/pileup/main' +include { MODKIT_PILEUP as MODKIT_PILEUP_PHASED } from '../../modules/nf-core/modkit/pileup/main' +include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP_UNPHASED } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP_PHASED } from '../../modules/nf-core/tabix/bgziptabix/main' workflow METHYLATION { @@ -15,22 +15,22 @@ workflow METHYLATION { ch_versions = Channel.empty() // Run modkit pileup once without dividing by HP-tag and once with - MODKIT_PILEUP(ch_haplotagged_bam_bai, ch_fasta, ch_bed) - ch_versions = ch_versions.mix(MODKIT_PILEUP.out.versions) + MODKIT_PILEUP_UNPHASED (ch_haplotagged_bam_bai, ch_fasta, ch_bed) + ch_versions = ch_versions.mix(MODKIT_PILEUP_UNPHASED.out.versions) - MODKIT_PILEUP_HAPLOTYPES(ch_haplotagged_bam_bai, ch_fasta, ch_bed) - ch_versions = ch_versions.mix(MODKIT_PILEUP_HAPLOTYPES.out.versions) + MODKIT_PILEUP_PHASED (ch_haplotagged_bam_bai, ch_fasta, ch_bed) + ch_versions = ch_versions.mix(MODKIT_PILEUP_PHASED.out.versions) // Bgzip and index output "BED" - BGZIP_MODKIT_PILEUP ( MODKIT_PILEUP.out.bed ) - ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP.out.versions) + BGZIP_MODKIT_PILEUP_UNPHASED ( MODKIT_PILEUP_UNPHASED.out.bed ) + ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP_UNPHASED.out.versions) - MODKIT_PILEUP_HAPLOTYPES.out.bed + MODKIT_PILEUP_PHASED.out.bed .transpose() - .set { ch_bgzip_modkit_haplotypes_in } + .set { ch_bgzip_modkit_pileup_phased_in } - BGZIP_MODKIT_PILEUP_HAPLOTYPES ( ch_bgzip_modkit_haplotypes_in ) - ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP_HAPLOTYPES.out.versions) + BGZIP_MODKIT_PILEUP_PHASED ( ch_bgzip_modkit_pileup_phased_in ) + ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP_PHASED.out.versions) emit: versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/local/phasing.nf b/subworkflows/local/phasing.nf index 19455fcb..fb87a27b 100644 --- a/subworkflows/local/phasing.nf +++ b/subworkflows/local/phasing.nf @@ -19,8 +19,9 @@ workflow PHASING { fai // channel: [ val(meta), fai ] main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() ch_bam_bai_haplotagged = Channel.empty() + ch_vcf_index = Channel.empty() TABIX_TABIX(ch_vcf) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) @@ -30,9 +31,6 @@ workflow PHASING { WHATSHAP_PHASE( ch_vcf.join(ch_bam_bai), fasta, fai ) ch_versions = ch_versions.mix(WHATSHAP_PHASE.out.versions) - WHATSHAP_STATS( WHATSHAP_PHASE.out.vcf_tbi ) - ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions) - WHATSHAP_PHASE.out.vcf_tbi .join(ch_bam_bai) .set { ch_whatshap_haplotag_in } @@ -48,10 +46,11 @@ workflow PHASING { .join(SAMTOOLS_INDEX_WHATSHAP.out.bai) .set { ch_bam_bai_haplotagged } - } else if (params.phaser.equals("hiphase_snv")) { + ch_vcf_index = ch_vcf_index.mix( WHATSHAP_PHASE.out.vcf_tbi ) + } else if (params.phaser.equals("hiphase_snv")) { ch_vcf - .join(TABIX_TABIX.out.tbi) + .join(TABIX_TABIX.out.csi) .join(ch_bam_bai) .set { ch_hiphase_snv_in } @@ -62,6 +61,8 @@ workflow PHASING { .join(HIPHASE_SNV.out.bais) .set { ch_bam_bai_haplotagged } + ch_vcf_index = ch_vcf_index.mix( HIPHASE_SNV.out.vcfs.join(HIPHASE_SNV.out.vcfs_tbi) ) + } else if (params.phaser.equals("hiphase_sv")) { // Sniffles specific... BCFTOOLS_REHEADER( @@ -92,7 +93,7 @@ workflow PHASING { .groupTuple() .set { ch_hiphase_vcf } - TABIX_TABIX.out.tbi + TABIX_TABIX.out.csi .concat(ch_sv_tbi) .groupTuple() .set { ch_hiphase_tbi } @@ -108,12 +109,18 @@ workflow PHASING { HIPHASE_SV.out.bams .join(HIPHASE_SV.out.bais) .set { ch_bam_bai_haplotagged } + + ch_vcf_index = ch_vcf_index.mix( HIPHASE_SV.out.vcfs.join(HIPHASE_SV.out.vcfs_tbi) ) } + WHATSHAP_STATS( ch_vcf_index ) + ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions) + CRAMINO_PHASED( ch_bam_bai_haplotagged ) ch_versions = ch_versions.mix(CRAMINO_PHASED.out.versions) emit: - haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), bam, bai ] - versions = ch_versions // channel: [ versions.yml ] + haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), bam, bai ] + stats = WHATSHAP_STATS.out.stats // channel: [ val(meta), txt ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 07117db5..1babff3b 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -1,13 +1,13 @@ -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' +include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { UNTAR as UNTAR_VEP_CACHE } from '../../modules/nf-core/untar/main' workflow PREPARE_GENOME { take: - fasta_in // channel: [ val(meta), fasta ] - ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] + fasta_in // channel: [mandatory] [ val(meta), path(fasta) ] + ch_vep_cache // channel: [optional] [ path(cache) ] main: ch_versions = Channel.empty() @@ -29,24 +29,23 @@ workflow PREPARE_GENOME { } SAMTOOLS_FAIDX ( ch_fasta, [[],[]] ) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + MINIMAP2_INDEX ( ch_fasta ) + ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) UNTAR_VEP_CACHE (ch_vep_cache) + ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) UNTAR_VEP_CACHE.out.untar .map { meta, files -> [files] } .collect() .set { untarred_vep } - // Gather versions - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) - ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions.first()) - ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) - emit: - fasta = ch_fasta // channel: [ val(meta), fasta ] - fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), fai ] + mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ] + fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), path(fai) ] + fasta = ch_fasta // channel: [ val(meta), path(fasta) ] vep_resources = untarred_vep // channel: [ path(cache) ] - mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), mmi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/qc_aligned_reads.nf b/subworkflows/local/qc_aligned_reads.nf index c7eba920..2176f807 100644 --- a/subworkflows/local/qc_aligned_reads.nf +++ b/subworkflows/local/qc_aligned_reads.nf @@ -11,8 +11,6 @@ workflow QC_ALIGNED_READS { main: ch_versions = Channel.empty() - // Prepare inputs - CRAMINO (ch_bam_bai) ch_versions = ch_versions.mix(CRAMINO.out.versions) @@ -24,6 +22,9 @@ workflow QC_ALIGNED_READS { ch_versions = ch_versions.mix(MOSDEPTH.out.versions) emit: - versions = ch_versions // channel: [ versions.yml ] + mosdepth_summary = MOSDEPTH.out.summary_txt // channel: [ val(meta), path(txt) ] + mosdepth_global_dist = MOSDEPTH.out.global_txt // channel: [ val(meta), path(txt) ] + mosdepth_region_dist = MOSDEPTH.out.regions_txt // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/rank_variants/main.nf b/subworkflows/local/rank_variants/main.nf new file mode 100644 index 00000000..c6820e14 --- /dev/null +++ b/subworkflows/local/rank_variants/main.nf @@ -0,0 +1,52 @@ +// +// A subworkflow to score and rank variants. +// + +include { GENMOD_ANNOTATE } from '../../../modules/nf-core/genmod/annotate/main' +include { GENMOD_MODELS } from '../../../modules/nf-core/genmod/models/main' +include { GENMOD_SCORE } from '../../../modules/nf-core/genmod/score/main' +include { GENMOD_COMPOUND } from '../../../modules/nf-core/genmod/compound/main' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' + +workflow RANK_VARIANTS { + + take: + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_pedfile // channel: [mandatory] [ path(ped) ] + ch_reduced_penetrance // channel: [mandatory] [ path(pentrance) ] + ch_score_config // channel: [mandatory] [ path(ini) ] + + main: + ch_versions = Channel.empty() + + GENMOD_ANNOTATE(ch_vcf) + + GENMOD_MODELS(GENMOD_ANNOTATE.out.vcf, ch_pedfile, ch_reduced_penetrance) + + GENMOD_SCORE(GENMOD_MODELS.out.vcf, ch_pedfile, ch_score_config) + + GENMOD_COMPOUND(GENMOD_SCORE.out.vcf) + + BCFTOOLS_SORT(GENMOD_COMPOUND.out.vcf) // SV file needs to be sorted before indexing + + TABIX_BGZIP(GENMOD_COMPOUND.out.vcf) //run only for SNVs + + ch_vcf = TABIX_BGZIP.out.output.mix(BCFTOOLS_SORT.out.vcf) + + TABIX_TABIX (ch_vcf) + + ch_versions = ch_versions.mix(GENMOD_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(GENMOD_MODELS.out.versions) + ch_versions = ch_versions.mix(GENMOD_SCORE.out.versions) + ch_versions = ch_versions.mix(GENMOD_COMPOUND.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + emit: + vcf = ch_vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/repeat_analysis.nf b/subworkflows/local/repeat_analysis.nf deleted file mode 100644 index b6b0b79e..00000000 --- a/subworkflows/local/repeat_analysis.nf +++ /dev/null @@ -1,57 +0,0 @@ -include { TRGT } from '../../modules/local/trgt' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_TRGT } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_TRGT } from '../../modules/nf-core/samtools/sort/main' -include { BCFTOOLS_SORT as BCFTOOLS_SORT_TRGT } from '../../modules/nf-core/bcftools/sort/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_MERGE } from '../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main' - -workflow REPEAT_ANALYSIS { - - take: - ch_bam_bai - ch_fasta - ch_fai - ch_trgt_bed - - main: - ch_repeat_calls_vcf = Channel.empty() - ch_versions = Channel.empty() - - ch_bam_bai - .map{ meta, bam, bai -> [meta, bam, bai, meta.sex] } - .set{ ch_trgt_input } - - // Run TGRT - TRGT ( ch_trgt_input, ch_fasta, ch_trgt_bed ) - - // Sort and index bam - SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] ) - SAMTOOLS_INDEX_TRGT(SAMTOOLS_SORT_TRGT.out.bam) - - // Sort and index bcf - BCFTOOLS_SORT_TRGT(TRGT.out.vcf) - - BCFTOOLS_SORT_TRGT.out.vcf - .join( BCFTOOLS_SORT_TRGT.out.tbi ) - .toList() - .filter { it.size() > 1 } - .flatMap() - .map { meta, bcf, csi -> [ [ id : 'multisample' ], bcf, csi ] } - .groupTuple() - .set{ ch_bcftools_merge_in } - - BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [] ) - - BCFTOOLS_INDEX_MERGE ( BCFTOOLS_MERGE.out.merged_variants ) - - ch_versions = ch_versions.mix(TRGT.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX_MERGE.out.versions) - - emit: - versions = ch_versions // channel: [ versions.yml ] -} - diff --git a/subworkflows/local/scatter_genome/main.nf b/subworkflows/local/scatter_genome/main.nf new file mode 100644 index 00000000..5e611c29 --- /dev/null +++ b/subworkflows/local/scatter_genome/main.nf @@ -0,0 +1,65 @@ +include { BEDTOOLS_MERGE } from '../../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_SORT } from '../../../modules/nf-core/bedtools/sort/main' +include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' +include { SPLIT_BED_CHUNKS } from '../../../modules/local/split_bed_chunks/main' + +workflow SCATTER_GENOME { + + take: + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_input_bed // channel: [optional] [ val(meta), path(bed) ] + make_bed_from_fai // bool + make_bed_intervals // bool + split_n // integer: split bed into n regions + + main: + ch_versions = Channel.empty() + ch_bed = Channel.empty() + ch_bed_intervals = Channel.empty() + + // + // If no BED-file is provided then build intervals from reference + // + if( make_bed_from_fai ) { + + BUILD_INTERVALS ( ch_fai.map { name, fai -> [ [ id: name ], fai ] } ) + ch_versions = ch_versions.mix(BUILD_INTERVALS.out.versions) + + BUILD_INTERVALS.out.bed + .set{ ch_bed } + } else { + ch_input_bed + .set{ ch_bed } + } + + // + // Merge overlapping and then split BED regions for SNV calling + // + if( make_bed_intervals ) { + + if( split_n < 1 ) { error "Can't split bed file into less than one file" } + + // Sort and merge overlapping regions + BEDTOOLS_SORT ( ch_bed, [] ) + ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + + BEDTOOLS_MERGE ( BEDTOOLS_SORT.out.sorted ) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions) + + SPLIT_BED_CHUNKS( BEDTOOLS_MERGE.out.bed, split_n ) + ch_versions = ch_versions.mix(SPLIT_BED_CHUNKS.out.versions) + + // Create a channel with the bed file and the total number of intervals (for groupKey) + SPLIT_BED_CHUNKS.out.split_beds + .collect() + .map{ it -> [ it, it.size() ] } + .transpose() + .set { ch_bed_intervals } + } + + emit: + bed = ch_bed // channel: [ val(meta), path(bed) ] + bed_intervals = ch_bed_intervals // channel: [ path(bed), val(num_intervals) ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/scatter_genome/tests/main.nf.test b/subworkflows/local/scatter_genome/tests/main.nf.test new file mode 100644 index 00000000..48b3265c --- /dev/null +++ b/subworkflows/local/scatter_genome/tests/main.nf.test @@ -0,0 +1,188 @@ +nextflow_workflow { + + name "Test Workflow SCATTER_GENOME" + script "../main.nf" + workflow "SCATTER_GENOME" + config "./nextflow.config" + + setup { + run("SAMTOOLS_FAIDX") { + script "../../../../modules/nf-core/samtools/faidx/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + } + + test("fai, [], true, false, []") { + + when { + workflow { + """ + input[0] = SAMTOOLS_FAIDX.out.fai + input[1] = [[],[]] + input[2] = true + input[3] = false + input[4] = [] + """ + } + } + + then { + assertAll ( + { assert workflow.success }, + { assert snapshot( + workflow.out, + path(workflow.out.bed.get(0).get(1)).readLines(), + workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() }, + ).match() } + ) + } + + } + + test("fai, [], true, true, 3") { + + when { + workflow { + """ + input[0] = SAMTOOLS_FAIDX.out.fai + input[1] = [[],[]] + input[2] = true + input[3] = true + input[4] = 3 + """ + } + } + + then { + assertAll ( + { assert workflow.success }, + { assert snapshot( + workflow.out, + path(workflow.out.bed.get(0).get(1)).readLines(), + workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() }, + ).match() } + ) + } + + } + + test("fai, [], bed, true, 3") { + + when { + workflow { + """ + input[0] = SAMTOOLS_FAIDX.out.fai + input[1] = [[],[]] + input[2] = Channel.of('chr22\t0\t1000') + .collectFile(name: 'chr22.bed', newLine: true) + .map { file -> [ [ id:'chr22' ], file ] } + input[3] = true + input[4] = 3 + """ + } + } + + then { + assertAll ( + { assert workflow.success }, + { assert snapshot( + workflow.out, + path(workflow.out.bed.get(0).get(1)).readLines(), + workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() }, + ).match() } + ) + } + + } + + test("[], bed, false, false, 3") { + + when { + workflow { + """ + input[0] = [[],[]] + input[1] = Channel.of('chr22\t0\t1000') + .collectFile(name: 'chr22.bed', newLine: true) + .map { file -> [ [ id:'chr22' ], file ] } + input[2] = false + input[3] = false + input[4] = 3 + """ + } + } + + then { + assertAll ( + { assert workflow.success }, + { assert snapshot( + workflow.out, + path(workflow.out.bed.get(0).get(1)).readLines(), + workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() }, + ).match() } + ) + } + + } + + test("[], bed, false, true, 3") { + + when { + workflow { + """ + input[0] = [[],[]] + input[1] = Channel.of('''chr22\t0\t1000\nchr22\t500\t1500\nchr22\t2000\t3000''') + .collectFile(name: 'chr22.bed', newLine: true) + .map { file -> [ [ id:'chr22' ], file ] } + input[2] = false + input[3] = true + input[4] = 3 + """ + } + } + + then { + assertAll ( + { assert workflow.success }, + { assert snapshot( + workflow.out, + path(workflow.out.bed.get(0).get(1)).readLines(), + workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() }, + ).match() } + ) + } + + } + + test("[], bed, false, true, 0 - should fail") { + + when { + workflow { + """ + input[0] = [[],[]] + input[1] = Channel.of('''chr22\t0\t1000\nchr22\t500\t1500\nchr22\t2000\t3000''') + .collectFile(name: 'chr22.bed', newLine: true) + .map { file -> [ [ id:'chr22' ], file ] } + input[2] = false + input[3] = true + input[4] = 0 + """ + } + } + + then { + assertAll ( + { assert workflow.failed }, + ) + } + + } + +} diff --git a/subworkflows/local/scatter_genome/tests/main.nf.test.snap b/subworkflows/local/scatter_genome/tests/main.nf.test.snap new file mode 100644 index 00000000..163401f0 --- /dev/null +++ b/subworkflows/local/scatter_genome/tests/main.nf.test.snap @@ -0,0 +1,280 @@ +{ + "fai, [], bed, true, 3": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + [ + "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + 1 + ] + ], + "2": [ + "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9", + "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be", + "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240", + "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1" + ], + "bed": [ + [ + { + "id": "test" + }, + "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "bed_intervals": [ + [ + "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + 1 + ] + ], + "versions": [ + "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9", + "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be", + "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240", + "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1" + ] + }, + [ + "chr22\t0\t40001" + ], + [ + [ + "chr22\t0\t40001" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T13:03:46.414904212" + }, + "[], bed, false, false, 3": { + "content": [ + { + "0": [ + [ + { + "id": "chr22" + }, + "chr22.bed:md5,3b0b598acca89a84aa414e4c95abec1f" + ] + ], + "1": [ + + ], + "2": [ + + ], + "bed": [ + [ + { + "id": "chr22" + }, + "chr22.bed:md5,3b0b598acca89a84aa414e4c95abec1f" + ] + ], + "bed_intervals": [ + + ], + "versions": [ + + ] + }, + [ + "chr22\t0\t1000" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T12:53:03.541050122" + }, + "[], bed, false, true, 3": { + "content": [ + { + "0": [ + [ + { + "id": "chr22" + }, + "chr22.bed:md5,88ebd3fd77139b3d33af226231eff0df" + ] + ], + "1": [ + [ + "1.bed:md5,37978db9b095a29ebbd64f65dd8f13b3", + 2 + ], + [ + "2.bed:md5,0da4774e61b9c9005122e46f24522604", + 2 + ] + ], + "2": [ + "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9", + "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be", + "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240" + ], + "bed": [ + [ + { + "id": "chr22" + }, + "chr22.bed:md5,88ebd3fd77139b3d33af226231eff0df" + ] + ], + "bed_intervals": [ + [ + "1.bed:md5,37978db9b095a29ebbd64f65dd8f13b3", + 2 + ], + [ + "2.bed:md5,0da4774e61b9c9005122e46f24522604", + 2 + ] + ], + "versions": [ + "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9", + "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be", + "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240" + ] + }, + [ + "chr22\t0\t1000", + "chr22\t500\t1500", + "chr22\t2000\t3000" + ], + [ + [ + "chr22\t0\t1500" + ], + [ + "chr22\t2000\t3000" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T13:06:34.382235567" + }, + "fai, [], true, true, 3": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + [ + "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + 1 + ] + ], + "2": [ + "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9", + "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be", + "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240", + "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1" + ], + "bed": [ + [ + { + "id": "test" + }, + "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "bed_intervals": [ + [ + "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + 1 + ] + ], + "versions": [ + "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9", + "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be", + "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240", + "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1" + ] + }, + [ + "chr22\t0\t40001" + ], + [ + [ + "chr22\t0\t40001" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T13:03:35.848426979" + }, + "fai, [], true, false, []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1" + ], + "bed": [ + [ + { + "id": "test" + }, + "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "bed_intervals": [ + + ], + "versions": [ + "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1" + ] + }, + [ + "chr22\t0\t40001" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T13:03:25.327078297" + } +} \ No newline at end of file diff --git a/subworkflows/local/scatter_genome/tests/nextflow.config b/subworkflows/local/scatter_genome/tests/nextflow.config new file mode 100644 index 00000000..2efae8e4 --- /dev/null +++ b/subworkflows/local/scatter_genome/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'BEDTOOLS_MERGE' { + ext.prefix = { "${meta.id}_merged" } + } + withName: 'BEDTOOLS_SORT' { + ext.prefix = { "${meta.id}_sorted" } + } +} diff --git a/subworkflows/local/short_variant_calling.nf b/subworkflows/local/short_variant_calling.nf deleted file mode 100644 index 9b050f0c..00000000 --- a/subworkflows/local/short_variant_calling.nf +++ /dev/null @@ -1,108 +0,0 @@ -include { DEEPVARIANT } from '../../modules/nf-core/deepvariant' -include { GLNEXUS } from '../../modules/nf-core/glnexus' -include { BCFTOOLS_VIEW_REGIONS } from '../../modules/local/bcftools/view_regions' -include { TABIX_TABIX as TABIX_EXTRA_GVCFS } from '../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_DV } from '../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_DV_VCF } from '../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_GLNEXUS } from '../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_DV } from '../../modules/nf-core/bcftools/concat/main' -include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_DV_VCF } from '../../modules/nf-core/bcftools/concat/main' -include { BCFTOOLS_SORT as BCFTOOLS_SORT_DV } from '../../modules/nf-core/bcftools/sort/main' -include { BCFTOOLS_SORT as BCFTOOLS_SORT_DV_VCF } from '../../modules/nf-core/bcftools/sort/main' - -workflow SHORT_VARIANT_CALLING { - - take: - ch_bam_bai - ch_extra_gvcfs - ch_fasta - ch_fai - ch_bed - - main: - ch_snp_calls_vcf = Channel.empty() - ch_snp_calls_gvcf = Channel.empty() - ch_combined_bcf = Channel.empty() - ch_versions = Channel.empty() - - // Does splitting BAMs and copying to node make sense to reduce IO? - - // Only one of these is run depending on params.variant_caller (when clause condition is defined in the conf/modules.config) - DEEPVARIANT ( ch_bam_bai, ch_fasta, ch_fai, [[],[]] ) - - // Collect VCFs - ch_snp_calls_vcf = ch_snp_calls_vcf.mix(DEEPVARIANT.out.vcf) - - // Collect GVCFs - ch_snp_calls_gvcf = ch_snp_calls_gvcf.mix(DEEPVARIANT.out.gvcf) - - // Extra gVCFs - TABIX_EXTRA_GVCFS(ch_extra_gvcfs) - - ch_extra_gvcfs - .join(TABIX_EXTRA_GVCFS.out.tbi) - .groupTuple() - .set{ ch_bcftools_view_regions_in } - - // This cuts all regions in BED file from extra gVCFS, better than nothing - BCFTOOLS_VIEW_REGIONS( ch_bcftools_view_regions_in, ch_bed ) - - // DV gVCFs - TABIX_DV(ch_snp_calls_gvcf) - - ch_snp_calls_gvcf - .groupTuple() // size not working here if there are less than specifed regions.. - .join(TABIX_DV.out.tbi.groupTuple()) - .set{ bcftools_concat_dv_in } - - - // Concat into one gVCF per sample & sort - BCFTOOLS_CONCAT_DV ( bcftools_concat_dv_in ) - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_DV.out.versions) - - BCFTOOLS_SORT_DV ( BCFTOOLS_CONCAT_DV.out.vcf ) - ch_versions = ch_versions.mix(BCFTOOLS_SORT_DV.out.versions) - - // DV VCFs - TABIX_DV_VCF(ch_snp_calls_vcf) - ch_versions = ch_versions.mix(TABIX_DV_VCF.out.versions) - - ch_snp_calls_vcf - .groupTuple() // size not working here if there are less than specifed regions.. - .join(TABIX_DV_VCF.out.tbi.groupTuple()) - .set{ bcftools_concat_dv_vcf_in } - - - // Concat into one VCF per sample & sort - BCFTOOLS_CONCAT_DV_VCF ( bcftools_concat_dv_vcf_in ) - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_DV_VCF.out.versions) - - BCFTOOLS_SORT_DV_VCF ( BCFTOOLS_CONCAT_DV_VCF.out.vcf ) - ch_versions = ch_versions.mix(BCFTOOLS_SORT_DV_VCF.out.versions) - - // Put DV and extra gvCFs together -> send to glnexus - BCFTOOLS_SORT_DV.out.vcf - .concat(BCFTOOLS_VIEW_REGIONS.out.vcf) - .map { meta, gvcf -> [ ['id':'multisample'], gvcf ]} - .groupTuple() - .set{ ch_glnexus_in } - - // Multisample - GLNEXUS( ch_glnexus_in, ch_bed ) - TABIX_GLNEXUS(GLNEXUS.out.bcf) - - // Get versions - ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) - ch_versions = ch_versions.mix(GLNEXUS.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_VIEW_REGIONS.out.versions) - ch_versions = ch_versions.mix(TABIX_EXTRA_GVCFS.out.versions) - ch_versions = ch_versions.mix(TABIX_DV.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_DV.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_SORT_DV.out.versions) - ch_versions = ch_versions.mix(TABIX_GLNEXUS.out.versions) - - emit: - snp_calls_vcf = BCFTOOLS_SORT_DV_VCF.out.vcf - combined_bcf = GLNEXUS.out.bcf - versions = ch_versions -} diff --git a/subworkflows/local/short_variant_calling/main.nf b/subworkflows/local/short_variant_calling/main.nf new file mode 100644 index 00000000..5395cd2e --- /dev/null +++ b/subworkflows/local/short_variant_calling/main.nf @@ -0,0 +1,100 @@ +// +// Workflow to call and merge SNVs +// +include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_FILLTAGS } from '../../../modules/local/bcftools/filltags/main' +include { BCFTOOLS_NORM as BCFTOOLS_NORM_MULTISAMPLE } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_NORM as BCFTOOLS_NORM_SINGLESAMPLE } from '../../../modules/nf-core/bcftools/norm/main' +include { DEEPVARIANT } from '../../../modules/nf-core/deepvariant/main' +include { GLNEXUS } from '../../../modules/nf-core/glnexus/main' + +workflow SHORT_VARIANT_CALLING { + + take: + ch_bam_bai_bed // channel: [mandatory] [ val(meta), path(bam), path(bai), path(call_region_bed) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_bed // channel: [optional] [ val(meta), path(input_bed) ] + ch_par_bed // channel: [mandatory] [ val(meta), path(par_bed) ] + + main: + ch_versions = Channel.empty() + + ch_bam_bai_bed + // Add call region to meta so we can group by it later + .map { meta, bam, bai, bed -> + [ meta + [ 'region': bed ], bam, bai, bed ] + } + .set { ch_deepvariant_in } + + DEEPVARIANT ( ch_deepvariant_in, ch_fasta, ch_fai, [[],[]], ch_par_bed ) + ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) + + // First remove region so we can group per sample + // Then after grouping remove num_intervals since to match the meta of other workflows + DEEPVARIANT.out.vcf + .map { meta, vcf -> + new_meta = meta - meta.subMap('region') + [ groupKey(new_meta, new_meta.num_intervals ), vcf ] + } + .groupTuple() + .join( DEEPVARIANT.out.vcf_tbi + .map{ meta, tbi -> + new_meta = meta - meta.subMap('region') + [ groupKey(new_meta, new_meta.num_intervals ), tbi ] + } + .groupTuple() + ) + .map { meta, vcf, tbi -> + [ meta - meta.subMap('num_intervals'), vcf, tbi ] + } + .set{ ch_concat_singlesample_in } + + // This creates a singlesample VCF containing ALL regions + BCFTOOLS_CONCAT ( ch_concat_singlesample_in ) + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) + + // Which is then normalized, and ready to be used + // in processes that require SNVs, but not annotated SNVs + BCFTOOLS_NORM_SINGLESAMPLE ( BCFTOOLS_CONCAT.out.vcf.map { meta, vcf -> [ meta, vcf, [] ] }, ch_fasta ) + ch_versions = ch_versions.mix(BCFTOOLS_NORM_SINGLESAMPLE.out.versions) + + // This creates a multisample VCF, with regions from ONE bed file + DEEPVARIANT.out.gvcf + .map { meta, gvcf -> + [ meta.region.name, meta.project, meta.phenotype == 2, gvcf ] + } + .groupTuple() // Group all files together per region + // If any of the samples in the VCF have an affected phenotype (2) + // add this to the meta of the multisample VCF to know if we should run RANK_VARIANTS or not + .map { meta, project, affected, gvcfs -> + new_meta = [ + 'id': meta, + 'project': project.first(), // Works only because only one project per run is allowed + 'contains_affected': affected.any(), + ] + [ new_meta, gvcfs ] + } + .set{ glnexus_in } + + GLNEXUS( glnexus_in, ch_bed ) + ch_versions = ch_versions.mix(GLNEXUS.out.versions) + + // Add allele count tag to multisample bcf + BCFTOOLS_FILLTAGS ( GLNEXUS.out.bcf ) + ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS.out.versions) + + BCFTOOLS_FILLTAGS.out.vcf + .map { meta, vcf -> [ meta, vcf, [] ] } + .set { bcftools_norm_in } + + // Decompose and normalize variants + BCFTOOLS_NORM_MULTISAMPLE ( bcftools_norm_in, ch_fasta ) + ch_versions = ch_versions.mix(BCFTOOLS_NORM_MULTISAMPLE.out.versions) + + emit: + snp_calls_vcf = BCFTOOLS_NORM_SINGLESAMPLE.out.vcf // channel: [ val(meta), path(bcf) ] + combined_bcf = BCFTOOLS_NORM_MULTISAMPLE.out.vcf // channel: [ val(meta), path(bcf) ] + combined_csi = BCFTOOLS_NORM_MULTISAMPLE.out.csi // channel: [ val(meta), path(csi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test b/subworkflows/local/short_variant_calling/tests/main.nf.test new file mode 100644 index 00000000..1d07358f --- /dev/null +++ b/subworkflows/local/short_variant_calling/tests/main.nf.test @@ -0,0 +1,564 @@ +nextflow_workflow { + + name "Test Workflow SHORT_VARIANT_CALLING" + script "../main.nf" + config "./nextflow.config" + workflow "SHORT_VARIANT_CALLING" + + test("1 sample - no bed, fasta, fai, [], []") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, num_intervals:1 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } + + test("1 sample - 1 bed, fasta, fai, [], []") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, num_intervals:1 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("1 sample - 1 bed, fasta, fai, bed, []") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, num_intervals:1 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("1 sample - 2 bed, fasta, fai, bed, []") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("2 samples - 2 bed, fasta, fai, bed, []") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("2 samples - 2 bed, fasta, fai, bed, par_bed") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [ + [ id:'par_bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + } + + test("1 sample - no bed, fasta, fai, [], [] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, num_intervals:1 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("1 sample - 1 bed, fasta, fai, [], [] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, num_intervals:1 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("1 sample - 1 bed, fasta, fai, bed, [] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, num_intervals:1 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("1 sample - 2 bed, fasta, fai, bed, [] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("2 samples - 2 bed, fasta, fai, bed, [] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + + test("2 samples - 2 bed, fasta, fai, bed, par_bed - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false, num_intervals:2 ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [ + [ id:'par_bed'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() + } + ) + } + + } + +} diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap new file mode 100644 index 00000000..d35e9387 --- /dev/null +++ b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap @@ -0,0 +1,1294 @@ +{ + "1 sample - 1 bed, fasta, fai, bed, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:18:48.194341252" + }, + "2 samples - 2 bed, fasta, fai, bed, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:38:57.50673848" + }, + "2 samples - 2 bed, fasta, fai, bed, par_bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:39:09.239296314" + }, + "1 sample - 1 bed, fasta, fai, bed, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:20:26.146017974" + }, + "2 samples - 2 bed, fasta, fai, bed, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:37:29.720749753" + }, + "1 sample - 1 bed, fasta, fai, [], []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:18:20.527237885" + }, + "1 sample - 2 bed, fasta, fai, bed, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:38:45.950944715" + }, + "2 samples - 2 bed, fasta, fai, bed, par_bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:40:41.412678218" + }, + "1 sample - no bed, fasta, fai, [], []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + ] + ], + "1": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "2": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "combined_csi": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:17:52.991579386" + }, + "1 sample - 2 bed, fasta, fai, bed, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ], + [ + { + "id": "genome.multi_intervals.bed", + "contains_affected": false + }, + "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:36:53.97901474" + }, + "1 sample - no bed, fasta, fai, [], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "combined_csi": [ + [ + { + "id": [ + + ], + "contains_affected": false + }, + "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:20:07.024378325" + }, + "1 sample - 1 bed, fasta, fai, [], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ], + "combined_bcf": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "combined_csi": [ + [ + { + "id": "genome.bed", + "contains_affected": false + }, + "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_calls_vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", + "versions.yml:md5,13101c9283d4a82e859574b0a981311c", + "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", + "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022", + "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d", + "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-13T12:20:16.739088461" + } +} \ No newline at end of file diff --git a/subworkflows/local/short_variant_calling/tests/nextflow.config b/subworkflows/local/short_variant_calling/tests/nextflow.config new file mode 100644 index 00000000..f9b1d1a5 --- /dev/null +++ b/subworkflows/local/short_variant_calling/tests/nextflow.config @@ -0,0 +1,52 @@ +process { + withName: 'DEEPVARIANT' { + ext.prefix = { intervals ? "${meta.id}_${intervals}_deepvariant" : "${meta.id}_deepvariant" } + ext.args = { [ + '--model_type WGS', + "--sample_name=${meta.id}", + '-vcf_stats_report=False' + ].join(' ') } + } + + withName: 'GLNEXUS' { + ext.args = '--config DeepVariant_unfiltered' + } + + withName: 'BCFTOOLS_CONCAT' { + ext.prefix = { "${meta.id}_concat" } + ext.args = [ + '--no-version', + '--allow-overlaps' + ].join(' ') + } + + withName: 'BCFTOOLS_NORM_MULTISAMPLE' { + ext.prefix = { "${meta.id}_norm_multisample" } + ext.args = [ + '--no-version', + '-m -', + '--output-type z', + '--write-index=csi', + '-w 10000' + ].join(' ') + } + + withName: 'BCFTOOLS_NORM_SINGLESAMPLE' { + ext.prefix = { "${meta.id}_norm_singlesample" } + ext.args = [ + '--no-version', + '-m -', + '-w 10000', + '--output-type u', + ].join(' ') + } + + withName: 'BCFTOOLS_FILLTAGS' { + ext.prefix = { "${meta.id}_ac" } + ext.args = [ + '--no-version', + '--output-type u' + ].join(' ') + } + +} diff --git a/subworkflows/local/snv_annotation.nf b/subworkflows/local/snv_annotation.nf deleted file mode 100644 index f65c214f..00000000 --- a/subworkflows/local/snv_annotation.nf +++ /dev/null @@ -1,81 +0,0 @@ -// TODO: BCFTOOLS processes should have unique names so that they are not used multiple times in other workflows? -include { ECHTVAR_ANNO } from '../../modules/local/echtvar/anno/main' -include { ECHTVAR_ENCODE } from '../../modules/local/echtvar/encode/main' -include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_NORM as BCFTOOLS_NORM_SINGLESAMPLE } from '../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_INDEX } from '../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_SINGLESAMPLE } from '../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_FILLTAGS } from '../../modules/local/bcftools/filltags/main' -include { BCFTOOLS_FILLTAGS as BCFTOOLS_FILLTAGS_ANNO } from '../../modules/local/bcftools/filltags/main' -include { ENSEMBLVEP_VEP } from '../../modules/nf-core/ensemblvep/vep/main' -include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/tabix/tabix/main' - -workflow SNV_ANNOTATION { - - take: - ch_bcf - ch_single_sample_vcf - ch_databases - ch_fasta - ch_vep_cache - val_vep_cache_version // string: [mandatory] default: 110 - - main: - ch_versions = Channel.empty() - - // Add allele count tag to mutlisample vcf - BCFTOOLS_FILLTAGS(ch_bcf) - // Index and normalize multisample vcf - BCFTOOLS_INDEX(BCFTOOLS_FILLTAGS.out.vcf) - BCFTOOLS_NORM(BCFTOOLS_FILLTAGS.out.vcf.join(BCFTOOLS_INDEX.out.csi), ch_fasta) - - // Index and normalize single sample vcfs - BCFTOOLS_INDEX_SINGLESAMPLE(ch_single_sample_vcf) - - BCFTOOLS_NORM_SINGLESAMPLE( - ch_single_sample_vcf.join(BCFTOOLS_INDEX_SINGLESAMPLE.out.csi), - ch_fasta - ) - - // Make a cohort database using mutisample vcf - ECHTVAR_ENCODE(BCFTOOLS_NORM.out.vcf) - - // combine input databases with cohort database - db = ch_databases.concat(ECHTVAR_ENCODE.out.db.map{it[1]}).collect() - - // Annotate with chosen databases (GNOMAD,CADD + SAMPLES_DB) - - ECHTVAR_ANNO(BCFTOOLS_NORM_SINGLESAMPLE.out.vcf, db) - BCFTOOLS_FILLTAGS_ANNO(ECHTVAR_ANNO.out.bcf) - - vep_in = BCFTOOLS_FILLTAGS_ANNO.out.vcf.map{ meta, vcf -> return [meta, vcf, []]} - - // Annotate with VEP as well - - ENSEMBLVEP_VEP( - vep_in, - "GRCh38", - "homo_sapiens", - val_vep_cache_version, - ch_vep_cache, - ch_fasta, - [] - ) - - TABIX_VEP ( ENSEMBLVEP_VEP.out.vcf ) - - // Get versions - ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX_SINGLESAMPLE.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_NORM_SINGLESAMPLE.out.versions) - ch_versions = ch_versions.mix(ECHTVAR_ENCODE.out.versions) - ch_versions = ch_versions.mix(ECHTVAR_ANNO.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS_ANNO.out.versions) - ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions) - ch_versions = ch_versions.mix(TABIX_VEP.out.versions) - - emit: - versions = ch_versions -} diff --git a/subworkflows/local/snv_annotation/main.nf b/subworkflows/local/snv_annotation/main.nf new file mode 100644 index 00000000..1bcc22e7 --- /dev/null +++ b/subworkflows/local/snv_annotation/main.nf @@ -0,0 +1,74 @@ +include { ANNOTATE_CADD } from '../annotate_cadd/main' +include { ECHTVAR_ANNO } from '../../../modules/local/echtvar/anno/main' +include { BCFTOOLS_FILLTAGS as BCFTOOLS_FILLTAGS_ANNO } from '../../../modules/local/bcftools/filltags/main' +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' +include { TABIX_TABIX as TABIX_VEP } from '../../../modules/nf-core/tabix/tabix/main' + +workflow SNV_ANNOTATION { + + take: + ch_vcf // channel [mandatory] [ val(meta), path(vcf) ] + ch_databases // channel: [mandatory] [ val(meta), path(db) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_vep_cache // channel: [mandatory] [ path(cache) ] + val_vep_cache_version // string: [mandatory] default: 110 + ch_vep_extra_files // channel: [mandatory] [ path(files) ] + val_annotate_cadd // bool: [mandatory] + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_cadd_prescored // channel: [mandatory] [ path(prescored) ] + + main: + ch_versions = Channel.empty() + ch_vep_in = Channel.empty() + + // Annotate with chosen databases (GNOMAD,CADD + SAMPLES_DB) + ECHTVAR_ANNO ( ch_vcf, ch_databases ) + ch_versions = ch_versions.mix(ECHTVAR_ANNO.out.versions) + + BCFTOOLS_FILLTAGS_ANNO(ECHTVAR_ANNO.out.bcf) + ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS_ANNO.out.versions) + + // Annotating with CADD + if (val_annotate_cadd) { + ANNOTATE_CADD ( + ch_fai, + BCFTOOLS_FILLTAGS_ANNO.out.vcf, + BCFTOOLS_FILLTAGS_ANNO.out.tbi, + ch_cadd_header, + ch_cadd_resources, + ch_cadd_prescored + ) + ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) + + ANNOTATE_CADD.out.vcf + .map { meta, vcf -> [ meta, vcf, [] ] } + .set { ch_vep_in } + + } else { + BCFTOOLS_FILLTAGS_ANNO.out.vcf + .map { meta, vcf -> [ meta, vcf, [] ] } + .set { ch_vep_in } + + } + + ENSEMBLVEP_VEP ( + ch_vep_in, + "GRCh38", + "homo_sapiens", + val_vep_cache_version, + ch_vep_cache, + ch_fasta, + ch_vep_extra_files + ) + ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions) + + TABIX_VEP ( ENSEMBLVEP_VEP.out.vcf ) + ch_versions = ch_versions.mix(TABIX_VEP.out.versions) + + emit: + vcf = ENSEMBLVEP_VEP.out.vcf + tbi = TABIX_VEP.out.tbi + versions = ch_versions +} diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test b/subworkflows/local/snv_annotation/tests/main.nf.test new file mode 100644 index 00000000..6009d4c2 --- /dev/null +++ b/subworkflows/local/snv_annotation/tests/main.nf.test @@ -0,0 +1,160 @@ +nextflow_workflow { + + name "Test Workflow SNV_ANNOTATION" + script "../" + workflow "SNV_ANNOTATION" + config "./nextflow.config" + tag "SNV_ANNOTATION" + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id:'hg38' ], + file(params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz', checkIfExists: true) + ] + """ + } + } + run("SAMTOOLS_FAIDX") { + script "../../../../modules/nf-core/samtools/faidx/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [[],[]] + """ + } + } + run("MINIMAP2_ALIGN") { + script "../../../../modules/nf-core/minimap2/align/main.nf" + process { + """ + input[0] = [ + [ id: 'test', num_intervals:1 ], + file(params.pipelines_testdata_base_path + 'nallo/testdata/HG002_PacBio_Revio.fastq.gz', checkIfExists: true) + ] + input[1] = GUNZIP.out.gunzip + input[2] = true + input[3] = 'csi' + input[4] = false + input[5] = false + """ + } + } + run("SHORT_VARIANT_CALLING") { + script "../../short_variant_calling/main.nf" + process { + """ + input[0] = MINIMAP2_ALIGN.out.bam + .join(MINIMAP2_ALIGN.out.index) + .join(Channel.of([ + [ id: 'test', num_intervals:1 ], + file(params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed', checkifexists: true) + ])) + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ id: 'vep_cache' ], + file(params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz', checkIfExists:true) + ] + """ + } + } + + } + + test("bcf, db, vep_cache, '110'") { + + when { + workflow { + """ + input[0] = SHORT_VARIANT_CALLING.out.combined_bcf + input[1] = [ + file(params.pipelines_testdata_base_path + 'nallo/reference/cadd.v1.6.hg38.test_data.zip', checkIfExists: true) + ] + input[2] = GUNZIP.out.gunzip + input[3] = SAMTOOLS_FAIDX.out.fai + input[4] = UNTAR.out.untar.map { meta, cache -> cache } + input[5] = Channel.value('110') + input[6] = [ + file(params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv', checkIfExists: true) + ] + input[7] = false + input[8] = Channel.value([]) + input[9] = null + input[10] = null + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + file(workflow.out.tbi.get(0).get(1)).name, + path(workflow.out.vcf.get(0).get(1)).linesGzip[48..118], // Stable + path(workflow.out.vcf.get(0).get(1)).linesGzip[121..150] // Stable + ).match() } + ) + } + + } + + test("bcf, db, vep_cache, '110', -stub") { + + options "-stub" + + when { + params { + } + workflow { + """ + input[0] = SHORT_VARIANT_CALLING.out.combined_bcf + input[1] = [ + file(params.pipelines_testdata_base_path + 'nallo/reference/cadd.v1.6.hg38.test_data.zip', checkIfExists: true) + ] + input[2] = GUNZIP.out.gunzip + input[3] = SAMTOOLS_FAIDX.out.fai + input[4] = UNTAR.out.untar.map { meta, cache -> cache } + input[5] = Channel.value('110') + input[6] = [ + file(params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv', checkIfExists: true) + ] + input[7] = false + input[8] = Channel.value([]) + input[9] = null + input[10] = null + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.tbi, + workflow.out.vcf + ).match() } + ) + } + + } + +} diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test.snap b/subworkflows/local/snv_annotation/tests/main.nf.test.snap new file mode 100644 index 00000000..f491df0c --- /dev/null +++ b/subworkflows/local/snv_annotation/tests/main.nf.test.snap @@ -0,0 +1,156 @@ +{ + "bcf, db, vep_cache, '110', -stub": { + "content": [ + [ + "versions.yml:md5,503122d4650f6a8a39b4e810063d6c22", + "versions.yml:md5,992301857689684643c42695c032a7f2", + "versions.yml:md5,a07924ee4ebc2d4de5bb7ef897ddc30c", + "versions.yml:md5,c0e55e36a31ed71acf25702b7d059533" + ], + [ + [ + { + "id": "test_data.bed", + "contains_affected": false + }, + "test_data.bed.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test_data.bed", + "contains_affected": false + }, + "test_data.bed.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T10:11:15.912554747" + }, + "bcf, db, vep_cache, '110'": { + "content": [ + [ + "versions.yml:md5,503122d4650f6a8a39b4e810063d6c22", + "versions.yml:md5,992301857689684643c42695c032a7f2", + "versions.yml:md5,a07924ee4ebc2d4de5bb7ef897ddc30c", + "versions.yml:md5,c0e55e36a31ed71acf25702b7d059533" + ], + "test_data.bed.vcf.gz.tbi", + [ + "##INFO=", + "##VEP-command-line='vep --assembly GRCh38 --cache --cache_version 110 --compress_output bgzip --database 0 --dir_cache [PATH]/vep_cache --fasta hg38.test.fa --fork 2 --input_file test_data.bed_filltags_anno.vcf.gz --offline --output_file test_data.bed.vcf.gz --vcf'", + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ttest", + "chr16\t63972\tchr16_63972_T_C\tT\tC\t20\t.\tAF=1;AQ=20;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000262316|protein_coding||3/17||||||||||-1||HGNC|HGNC:20561,C|intron_variant&NMD_transcript_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000417043|nonsense_mediated_decay||2/3||||||||||-1||HGNC|HGNC:20561,C|intron_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000419764|protein_coding||3/3||||||||||-1|cds_end_NF|HGNC|HGNC:20561,C|intron_variant&NMD_transcript_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000428730|nonsense_mediated_decay||2/16||||||||||-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000448893|protein_coding|||||||||||4210|-1|cds_start_NF|HGNC|HGNC:20561,C|intron_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000450643|protein_coding||3/4||||||||||-1|cds_end_NF|HGNC|HGNC:20561,C|downstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000472390|retained_intron|||||||||||580|-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000482904|retained_intron|||||||||||1766|-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000486045|retained_intron|||||||||||3779|-1||HGNC|HGNC:20561,C|non_coding_transcript_exon_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000487201|retained_intron|2/3||||1177|||||||-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000493647|retained_intron|||||||||||3292|-1||HGNC|HGNC:20561\tGT:DP:AD:GQ:PL:RNC\t1/1:13:0,13:11:20,11,0:..", + "chr16\t86889\tchr16_86889_T_C\tT\tC\t21\t.\tAF=1;AQ=21;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000219431|protein_coding|||||||||||1047|1||HGNC|HGNC:7211,C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000356432|protein_coding|||||||||||1043|1||HGNC|HGNC:7211,C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000397817|protein_coding|||||||||||1038|1||HGNC|HGNC:7211,C|intron_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000399953|protein_coding||11/11||||||||||-1||HGNC|HGNC:14124,C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000436333|protein_coding|||||||||||1205|1|cds_end_NF|HGNC|HGNC:7211,C|upstream_gene_variant|MODIFIER||ENSG00000269482|Transcript|ENST00000601483|lncRNA|||||||||||3742|1|||,C|intron_variant&non_coding_transcript_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000610509|retained_intron||1/2||||||||||-1||HGNC|HGNC:14124,C|intron_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000611875|protein_coding||13/13||||||||||-1||HGNC|HGNC:14124,C|intron_variant&NMD_transcript_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000621703|nonsense_mediated_decay||10/10||||||||||-1||HGNC|HGNC:14124,C|intron_variant&NMD_transcript_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000622194|nonsense_mediated_decay||11/11||||||||||-1||HGNC|HGNC:14124\tGT:DP:AD:GQ:PL:RNC\t1/1:28:0,28:18:21,20,0:..", + "chr16\t160055\tchr16_160055_C_G\tC\tG\t18\t.\tAF=1;AQ=18;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3067|1||HGNC|HGNC:4836,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4427|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:0,29:17:18,22,0:..", + "chr16\t160070\tchr16_160070_C_CGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT\tC\tCGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3051|1||HGNC|HGNC:4836,GGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4411|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:30:8,7:2:9,0,18:..", + "chr16\t160089\tchr16_160089_TG_T\tTG\tT\t14\t.\tAF=0.5;AQ=14;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3032|1||HGNC|HGNC:4836,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4392|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:30:25,5:15:14,0,29:..", + "chr16\t160180\tchr16_160180_C_T\tC\tT\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2942|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4302|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:29:1,28:15:15,25,0:..", + "chr16\t160216\tchr16_160216_T_A\tT\tA\t10\t.\tAF=1;AQ=10;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2906|1||HGNC|HGNC:4836,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4266|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:0,27:6:10,6,0:..", + "chr16\t160217\tchr16_160217_C_T\tC\tT\t9\t.\tAF=1;AQ=9;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2905|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4265|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:27:0,27:5:9,3,0:..", + "chr16\t160326\tchr16_160326_T_A\tT\tA\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2796|1||HGNC|HGNC:4836,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4156|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:0,29:14:15,18,0:..", + "chr16\t160327\tchr16_160327_C_T\tC\tT\t16\t.\tAF=1;AQ=16;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2795|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4155|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:29:3,26:14:16,17,0:..", + "chr16\t160388\tchr16_160388_G_GGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA\tG\tGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA\t15\t.\tAF=0.5;AQ=15;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2733|1||HGNC|HGNC:4836,GGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4093|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:31:4,14:11:15,0,12:..", + "chr16\t160728\tchr16_160728_C_T\tC\tT\t5\t.\tAF=1;AQ=5;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2394|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3754|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:8,22:4:5,3,0:..", + "chr16\t160746\tchr16_160746_T_C\tT\tC\t10\t.\tAF=0.5;AQ=10;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2376|1||HGNC|HGNC:4836,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3736|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:30:23,7:10:10,0,18:..", + "chr16\t160751\tchr16_160751_AG_A;chr16_160752_G_GGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA\tAG\tA\t8\t.\tAF=0.5;AQ=8;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2370|1||HGNC|HGNC:4836,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3730|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/0:22:.,7:6:0,0,0:..", + "chr16\t160752\tchr16_160751_AG_A;chr16_160752_G_GGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA\tG\tGGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA\t8\t.\tAF=0.5;AQ=6;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2369|1||HGNC|HGNC:4836,GGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3729|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:.,14:6:0,0,0:..", + "chr16\t161210\tchr16_161210_GC_G\tGC\tG\t6\t.\tAF=0.5;AQ=6;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1911|1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4767|1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3271|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:27:21,5:7:6,0,27:..", + "chr16\t161474\tchr16_161474_TA_T\tTA\tT\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1647|1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4503|1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3007|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:25:16,9:23:22,0,39:..", + "chr16\t162329\tchr16_162329_G_A\tG\tA\t23\t.\tAF=0.5;AQ=23;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||793|1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||3649|1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2153|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:24:23,0,41:..", + "chr16\t162650\tchr16_162650_C_T\tC\tT\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||472|1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||3328|1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1832|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:24:9,15:33:34,0,41:..", + "chr16\t163106\tchr16_163106_C_T\tC\tT\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||16|1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2872|1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1376|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:22:22,0,39:..", + "chr16\t163140\tchr16_163140_T_G\tT\tG\t17\t.\tAF=0.5;AQ=17;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|non_coding_transcript_exon_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|1/3||||19|||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2838|1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1342|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:17:17,0,39:..", + "chr16\t163143\tchr16_163143_G_A\tG\tA\t16\t.\tAF=0.5;AQ=16;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|non_coding_transcript_exon_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|1/3||||22|||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2835|1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1339|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:17:16,0,41:..", + "chr16\t163229\tchr16_163229_T_C\tT\tC\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2749|1||HGNC|HGNC:4826,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1253|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:3:0,0,27:..", + "chr16\t163796\tchr16_163796_T_A\tT\tA\t12\t.\tAF=1;AQ=12;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2182|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4883|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||686|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:15:0,15:7:12,7,0:..", + "chr16\t163800\tchr16_163800_G_A\tG\tA\t8\t.\tAF=1;AQ=8;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2178|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4879|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||682|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:16:2,14:3:8,0,0:..", + "chr16\t163804\tchr16_163804_G_A\tG\tA\t14\t.\tAF=1;AQ=14;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2174|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4875|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||678|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:0,21:13:14,16,0:..", + "chr16\t164431\tchr16_164431_T_TG\tT\tTG\t25\t.\tAF=1;AQ=25;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1546|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4247|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||50|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:0,21:22:25,25,0:..", + "chr16\t164541\tchr16_164541_G_GGTCC\tG\tGGTCC\t26\t.\tAF=0.5;AQ=26;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GTCC|non_coding_transcript_exon_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|2/3||||155-156|||||||1||HGNC|HGNC:4836,GTCC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1436|1||HGNC|HGNC:4826,GTCC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4137|1||HGNC|HGNC:4825,GTCC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GTCC|non_coding_transcript_exon_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|1/3||||60-61|||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:13,7:26:26,0,39:..", + "chr16\t164690\tchr16_164690_C_CGCGGGGCGCGGTGCGGGCGGG\tC\tCGCGGGGCGCGGTGCGGGCGGG\t27\t.\tAF=0.5;AQ=27;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GCGGGGCGCGGTGCGGGCGGG|splice_donor_region_variant&intron_variant&non_coding_transcript_variant|LOW|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GCGGGGCGCGGTGCGGGCGGG|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1287|1||HGNC|HGNC:4826,GCGGGGCGCGGTGCGGGCGGG|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3988|1||HGNC|HGNC:4825,GCGGGGCGCGGTGCGGGCGGG|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GCGGGGCGCGGTGCGGGCGGG|splice_donor_region_variant&intron_variant&non_coding_transcript_variant|LOW|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:6,14:25:27,0,28:..", + "chr16\t164723\tchr16_164723_C_T\tC\tT\t11\t.\tAF=0.5;AQ=11;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1255|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3956|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:15,4:11:11,0,21:..", + "chr16\t164731\tchr16_164731_C_CG\tC\tCG\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1246|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3947|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:2,14:2:14,0,22:..", + "chr16\t164740\tchr16_164740_C_T\tC\tT\t2\t.\tAF=0.5;AQ=2;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1238|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3939|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:1,18:2:2,0,1:..", + "chr16\t164771\tchr16_164771_T_C\tT\tC\t2\t.\tAF=0.5;AQ=2;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1207|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3908|1||HGNC|HGNC:4825,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:13,6:4:10,0,25:..", + "chr16\t164791\tchr16_164791_G_A\tG\tA\t16\t.\tAF=0.5;AQ=16;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1187|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3888|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:6,15:15:16,0,20:..", + "chr16\t164800\tchr16_164800_G_GGCGGGGTCGCGGGGCGGGGCGAGGTC\tG\tGGCGGGGTCGCGGGGCGGGGCGAGGTC\t4\t.\tAF=0.5;AQ=4;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GCGGGGTCGCGGGGCGGGGCGAGGTC|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GCGGGGTCGCGGGGCGGGGCGAGGTC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1177|1||HGNC|HGNC:4826,GCGGGGTCGCGGGGCGGGGCGAGGTC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3878|1||HGNC|HGNC:4825,GCGGGGTCGCGGGGCGGGGCGAGGTC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GCGGGGTCGCGGGGCGGGGCGAGGTC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:14,4:5:4,0,19:..", + "chr16\t164817\tchr16_164817_C_A\tC\tA\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1161|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3862|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:8,13:21:22,0,27:..", + "chr16\t164831\tchr16_164831_C_CG\tC\tCG\t4\t.\tAF=1;AQ=4;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1146|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3847|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:4,15:2:4,0,0:..", + "chr16\t164839\tchr16_164839_G_GGT\tG\tGGT\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=GT|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GT|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1138|1||HGNC|HGNC:4826,GT|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3839|1||HGNC|HGNC:4825,GT|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GT|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:2,15:10:15,10,0:..", + "chr16\t164871\tchr16_164871_T_G\tT\tG\t13\t.\tAF=0.5;AQ=13;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1107|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3808|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:12,7:14:13,0,33:..", + "chr16\t164884\tchr16_164884_G_GT\tG\tGT\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1093|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3794|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:0,21:15:15,22,0:..", + "chr16\t164892\tchr16_164892_CG_C\tCG\tC\t2\t.\tAF=0.5;AQ=2;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1085|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3786|1||HGNC|HGNC:4825,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:18,3:5:2,0,21:..", + "chr16\t164907\tchr16_164907_C_A\tC\tA\t3\t.\tAF=0.5;AQ=3;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1071|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3772|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:12,8:5:3,0,20:..", + "chr16\t164931\tchr16_164931_C_G\tC\tG\t3\t.\tAF=0.5;AQ=3;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1047|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3748|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:9:1,8:5:3,0,19:..", + "chr16\t164932\tchr16_164932_G_GGGCGGGC\tG\tGGGCGGGC\t1\t.\tAF=0.5;AQ=1;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGCGGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GGCGGGC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1045|1||HGNC|HGNC:4826,GGCGGGC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3746|1||HGNC|HGNC:4825,GGCGGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGCGGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:9:2,7:4:1,0,15:..", + "chr16\t164942\tchr16_164942_G_GT\tG\tGT\t4\t.\tAF=0.5;AQ=4;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1035|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3736|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:9:0,8:6:4,0,14:..", + "chr16\t164953\tchr16_164953_G_GGCGGC\tG\tGGCGGC\t13\t.\tAF=0.5;AQ=13;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GCGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GCGGC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1024|1||HGNC|HGNC:4826,GCGGC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3725|1||HGNC|HGNC:4825,GCGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GCGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:14,6:13:13,0,33:..", + "chr16\t164985\tchr16_164985_C_CG\tC\tCG\t18\t.\tAF=1;AQ=18;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||992|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3693|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:19:1,18:16:18,18,0:..", + "chr16\t165212\tchr16_165212_TC_T\tTC\tT\t3\t.\tAF=0.5;AQ=3;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||57|1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||765|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3466|1||HGNC|HGNC:4825,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:18,3:5:3,0,30:..", + "chr16\t165380\tchr16_165380_G_A\tG\tA\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||224|1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||598|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3299|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:14,8:29:29,0,43:..", + "chr16\t165856\tchr16_165856_G_C\tG\tC\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||700|1||HGNC|HGNC:4836,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||122|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2823|1||HGNC|HGNC:4825,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:14,8:29:29,0,44:..", + "chr16\t166191\tchr16_166191_C_CG\tC\tCG\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1035|1||HGNC|HGNC:4836,G|intron_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2487|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:14,6:23:22,0,36:..", + "chr16\t166518\tchr16_166518_TG_T\tTG\tT\t11\t.\tAF=0.5;AQ=11;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1363|1||HGNC|HGNC:4836,-|intron_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding||2/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2160|1||HGNC|HGNC:4825,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||2/2||||||||||1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||2/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:16,4:12:11,0,32:..", + "chr16\t167928\tchr16_167928_GT_G\tGT\tG\t17\t.\tAF=0.5;AQ=17;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4947|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2773|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1165|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4961|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||750|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1168|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4965|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1168|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:14,6:17:17,0,31:..", + "chr16\t167947\tchr16_167947_T_TC\tT\tTC\t19\t.\tAF=0.5;AQ=19;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4928|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2791|1||HGNC|HGNC:4836,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1183|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4942|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||731|1||HGNC|HGNC:4825,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1186|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4946|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4996|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1186|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:11,10:18:19,0,22:..", + "chr16\t167948\tchr16_167948_T_C\tT\tC\t10\t.\tAF=0.5;AQ=10;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4928|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2792|1||HGNC|HGNC:4836,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1184|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4942|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||731|1||HGNC|HGNC:4825,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1187|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4946|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4996|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1187|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:10,10:10:10,0,17:..", + "chr16\t168533\tchr16_168533_T_C\tT\tC\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4343|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3377|1||HGNC|HGNC:4836,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1769|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4357|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||146|1||HGNC|HGNC:4825,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1772|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4361|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4411|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1772|1||HGNC|HGNC:4826,C|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4969|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:11,9:29:29,0,43:..", + "chr16\t168736\tchr16_168736_A_G\tA\tG\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4140|1||HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3580|1||HGNC|HGNC:4836,G|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1972|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4154|1||HGNC|HGNC:4824,G|non_coding_transcript_exon_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|1/3||||58|||||||1||HGNC|HGNC:4825,G|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1975|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4158|1||HGNC|HGNC:4824,G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4208|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1975|1||HGNC|HGNC:4826,G|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4766|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:11,9:34:34,0,48:..", + "chr16\t169117\tchr16_169117_TG_T\tTG\tT\t4\t.\tAF=0.5;AQ=4;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3758|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3962|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2354|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3772|1||HGNC|HGNC:4824,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||2357|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3776|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3826|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2357|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4384|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:17,3:6:4,0,27:..", + "chr16\t169404\tchr16_169404_TC_T\tTC\tT\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3471|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||4249|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2641|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3485|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||71|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||2644|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3489|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3539|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2644|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4097|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:16,2:7:5,0,30:..", + "chr16\t169624\tchr16_169624_TC_T\tTC\tT\t10\t.\tAF=0.5;AQ=10;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3251|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||4469|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2861|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3265|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||291|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||2864|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3269|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3319|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2864|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3877|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:16,4:11:10,0,32:..", + "chr16\t170831\tchr16_170831_T_TCACA\tT\tTCACA\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||2044|1||HGNC|HGNC:4824,CACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4067|1||HGNC|HGNC:4826,CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||2058|1||HGNC|HGNC:4824,CACA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1497|1||HGNC|HGNC:4825,CACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4070|1||HGNC|HGNC:4826,CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||2062|1||HGNC|HGNC:4824,CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||2112|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,CACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4070|1||HGNC|HGNC:4826,CACA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2670|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:6,6:3:19,0,25:..", + "chr16\t170862\tchr16_170862_C_CACACA\tC\tCACACA\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||2013|1||HGNC|HGNC:4824,ACACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4098|1||HGNC|HGNC:4826,ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||2027|1||HGNC|HGNC:4824,ACACA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1528|1||HGNC|HGNC:4825,ACACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4101|1||HGNC|HGNC:4826,ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||2031|1||HGNC|HGNC:4824,ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||2081|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,ACACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4101|1||HGNC|HGNC:4826,ACACA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2639|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:16,2:7:5,0,28:..", + "chr16\t170943\tchr16_170943_AG_A\tAG\tA\t1\t.\tAF=0.5;AQ=1;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1932|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4180|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1946|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1610|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4183|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1950|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||2000|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4183|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2558|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:18,2:4:1,0,26:..", + "chr16\t171206\tchr16_171206_T_TTTA\tT\tTTTA\t13\t.\tAF=0.5;AQ=13;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1669|1||HGNC|HGNC:4824,TTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4442|1||HGNC|HGNC:4826,TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1683|1||HGNC|HGNC:4824,TTA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1872|1||HGNC|HGNC:4825,TTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4445|1||HGNC|HGNC:4826,TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1687|1||HGNC|HGNC:4824,TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1737|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,TTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4445|1||HGNC|HGNC:4826,TTA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2295|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:11,9:13:13,0,22:..", + "chr16\t171219\tchr16_171219_T_TTA\tT\tTTA\t1\t.\tAF=0.5;AQ=1;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1656|1||HGNC|HGNC:4824,TA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4455|1||HGNC|HGNC:4826,TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1670|1||HGNC|HGNC:4824,TA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1885|1||HGNC|HGNC:4825,TA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4458|1||HGNC|HGNC:4826,TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1674|1||HGNC|HGNC:4824,TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1724|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,TA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4458|1||HGNC|HGNC:4826,TA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2282|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:14,3:4:1,0,20:..", + "chr16\t171220\tchr16_171220_T_TATTA\tT\tTATTA\t9\t.\tAF=0.5;AQ=9;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1655|1||HGNC|HGNC:4824,ATTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4456|1||HGNC|HGNC:4826,ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1669|1||HGNC|HGNC:4824,ATTA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1886|1||HGNC|HGNC:4825,ATTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4459|1||HGNC|HGNC:4826,ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1673|1||HGNC|HGNC:4824,ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1723|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,ATTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4459|1||HGNC|HGNC:4826,ATTA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2281|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:16,4:10:9,0,23:..", + "chr16\t171534\tchr16_171534_AT_A\tAT\tA\t20\t.\tAF=0.5;AQ=20;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1341|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4771|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1355|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2201|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4774|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1359|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1409|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4774|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||1967|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:14,5:20:20,0,40:..", + "chr16\t171881\tchr16_171881_TC_T\tTC\tT\t21\t.\tAF=0.5;AQ=21;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||994|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||4798|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||4822|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1008|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2548|1||HGNC|HGNC:4825,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||4816|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1012|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1062|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||4866|1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||1620|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:12,7:21:21,0,35:.." + ], + [ + "chr16\t172418\tchr16_172418_T_C\tT\tC\t20\t.\tAF=1;AQ=20;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||458|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||4262|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||4286|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||472|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3084|1||HGNC|HGNC:4825,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||4280|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||476|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||526|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||4330|1||HGNC|HGNC:4823,C|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4895|-1|||,C|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||1084|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:18:0,18:16:20,17,0:..", + "chr16\t172636\tchr16_172636_C_T\tC\tT\t23\t.\tAF=1;AQ=23;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||240|1||HGNC|HGNC:4824,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||4044|1||HGNC|HGNC:4823,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||4068|1||HGNC|HGNC:4823,T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||254|1||HGNC|HGNC:4824,T|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3302|1||HGNC|HGNC:4825,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||4062|1||HGNC|HGNC:4823,T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||258|1||HGNC|HGNC:4824,T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||308|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||4112|1||HGNC|HGNC:4823,T|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4677|-1|||,T|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||866|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:18:1,17:14:23,14,0:..", + "chr16\t172870\tchr16_172870_G_C\tG\tC\t24\t.\tAF=1;AQ=24;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||6|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||3810|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||3834|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||20|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3536|1||HGNC|HGNC:4825,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||3828|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||24|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||74|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||3878|1||HGNC|HGNC:4823,C|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4443|-1|||,C|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||632|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:18:0,18:20:24,22,0:..", + "chr16\t173116\tchr16_173116_TC_T\tTC\tT\t6\t.\tAF=0.5;AQ=6;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant|LOW|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding||1/2||||||||||1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||3563|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||3587|1||HGNC|HGNC:4823,-|splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant|LOW|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding||1/2||||||||||1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3783|1||HGNC|HGNC:4825,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||3581|1||HGNC|HGNC:4823,-|non_coding_transcript_exon_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|1/2||||224|||||||1||HGNC|HGNC:4824,-|splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant|LOW|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding||1/1||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||3631|1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4196|-1|||,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||385|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:16,3:8:6,0,30:..", + "chr16\t173707\tchr16_173707_A_G\tA\tG\t27\t.\tAF=1;AQ=27;AC=2;cadd_raw=0.19;cadd_phred=3.05;CSQ=G|3_prime_UTR_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|3/3||||573|||||||1||HGNC|HGNC:4824,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||2973|1||HGNC|HGNC:4823,G|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||4847|1||RFAM|,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||2997|1||HGNC|HGNC:4823,G|3_prime_UTR_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|3/3||||510|||||||1||HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4373|1||HGNC|HGNC:4825,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||2991|1||HGNC|HGNC:4823,G|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||32|1||HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||244|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||3041|1||HGNC|HGNC:4823,G|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||3606|-1|||,G|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||47|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:19:0,19:25:27,27,0:..", + "chr16\t176848\tchr16_176848_GC_G\tGC\tG\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBQ1|ENSG00000086506|Transcript|ENST00000199708|protein_coding|||||||||||3610|1||HGNC|HGNC:4833,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3139|1||HGNC|HGNC:4824,-|intron_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding||1/2||||||||||1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||1705|1||RFAM|,-|intron_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding||1/2||||||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3139|1||HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|1/2||||152|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3174|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3386|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron||1/1||||||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||464|-1|||,-|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3189|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:17:15,2:6:5,0,28:..", + "chr16\t176953\tchr16_176953_CA_C\tCA\tC\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBQ1|ENSG00000086506|Transcript|ENST00000199708|protein_coding|||||||||||3505|1||HGNC|HGNC:4833,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3244|1||HGNC|HGNC:4824,-|frameshift_variant|HIGH|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|2/3||||158|121|41|K/X|Aag/ag|||1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||1600|1||RFAM|,-|frameshift_variant|HIGH|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|2/3||||85|25|9|K/X|Aag/ag|||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3244|1||HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|1/2||||257|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3279|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3491|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|2/2||||90|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||359|-1|||,-|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3294|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:17:15,2:7:5,0,30:..", + "chr16\t177485\tchr16_177485_AC_A\tAC\tA\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBQ1|ENSG00000086506|Transcript|ENST00000199708|protein_coding|||||||||||2973|1||HGNC|HGNC:4833,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3776|1||HGNC|HGNC:4824,-|3_prime_UTR_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|3/3||||541|||||||1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||1068|1||RFAM|,-|3_prime_UTR_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|3/3||||468|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3776|1||HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|2/2||||640|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3811|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4023|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||212|1||HGNC|HGNC:4823,-|non_coding_transcript_exon_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|1/1||||5|||||||-1|||,-|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3826|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:17:13,4:6:5,0,31:..", + "chr16\t274052\tchr16_274052_A_G\tA\tG\t28\t.\tAF=1;AQ=28;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|synonymous_variant&NMD_transcript_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000168869|nonsense_mediated_decay|5/15||||353|250|84|L|Tta/Cta|||-1||HGNC|HGNC:9993,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000301679|protein_coding|||||||||||3972|1||HGNC|HGNC:14163,G|synonymous_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000316163|protein_coding|6/17||||434|357|119|D|gaT/gaC|||-1||HGNC|HGNC:9993,G|synonymous_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000359740|protein_coding|5/16||||387|387|129|D|gaT/gaC|||-1||HGNC|HGNC:9993,G|synonymous_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000397770|protein_coding|6/17||||453|420|140|D|gaT/gaC|||-1||HGNC|HGNC:9993,G|downstream_gene_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000472466|retained_intron|||||||||||887|-1||HGNC|HGNC:9993,G|non_coding_transcript_exon_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000477143|retained_intron|1/9||||905|||||||-1||HGNC|HGNC:9993,G|non_coding_transcript_exon_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000481672|retained_intron|2/11||||110|||||||-1||HGNC|HGNC:9993,G|non_coding_transcript_exon_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000493449|retained_intron|5/9||||1199|||||||-1||HGNC|HGNC:9993,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000600536|nonsense_mediated_decay|||||||||||4109|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000653392|nonsense_mediated_decay|||||||||||4216|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000654053|nonsense_mediated_decay|||||||||||4154|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000659283|nonsense_mediated_decay|||||||||||1869|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000666018|nonsense_mediated_decay|||||||||||4097|1||HGNC|HGNC:14163\tGT:DP:AD:GQ:PL:RNC\t1/1:32:0,32:26:28,29,0:..", + "chr16\t286660\tchr16_286660_G_A\tG\tA\t25\t.\tAF=1;AQ=25;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|synonymous_variant|LOW|PDIA2|ENSG00000185615|Transcript|ENST00000219406|protein_coding|9/11||||1353|1347|449|T|acG/acA|||1||HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000219409|protein_coding|||||||||||3650|1||HGNC|HGNC:680,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000262320|protein_coding|||||||||||780|-1||HGNC|HGNC:903,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000354866|protein_coding|||||||||||781|-1||HGNC|HGNC:903,A|synonymous_variant|LOW|PDIA2|ENSG00000185615|Transcript|ENST00000404312|protein_coding|9/11||||1343|1338|446|T|acG/acA|||1||HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000414650|protein_coding|||||||||||3850|1|cds_end_NF|HGNC|HGNC:680,A|synonymous_variant|LOW|PDIA2|ENSG00000185615|Transcript|ENST00000435833|protein_coding|3/5||||234|234|78|T|acG/acA|||1|cds_start_NF|HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000447871|protein_coding|||||||||||3972|1|cds_end_NF|HGNC|HGNC:680,A|downstream_gene_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000456379|protein_coding|||||||||||1520|1|cds_start_NF&cds_end_NF|HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000457798|protein_coding|||||||||||1530|-1|cds_start_NF|HGNC|HGNC:903,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000461023|retained_intron|||||||||||780|-1||HGNC|HGNC:903,A|downstream_gene_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000462950|protein_coding_CDS_not_defined|||||||||||1084|1||HGNC|HGNC:14180,A|non_coding_transcript_exon_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000467212|retained_intron|8/10||||1418|||||||1||HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000477621|retained_intron|||||||||||3820|1||HGNC|HGNC:680,A|non_coding_transcript_exon_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000482665|retained_intron|5/7||||1630|||||||1||HGNC|HGNC:14180\tGT:DP:AD:GQ:PL:RNC\t1/1:22:0,22:19:25,19,0:..", + "chr16\t309953\tchr16_309953_A_G\tA\tG\t33\t.\tAF=0.5;AQ=33;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000262320|protein_coding||4/10||||||||||-1||HGNC|HGNC:903,G|intron_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000354866|protein_coding||4/9||||||||||-1||HGNC|HGNC:903,G|intron_variant&non_coding_transcript_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000461023|retained_intron||3/7||||||||||-1||HGNC|HGNC:903,G|intron_variant&non_coding_transcript_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000481769|protein_coding_CDS_not_defined||3/4||||||||||-1||HGNC|HGNC:903\tGT:DP:AD:GQ:PL:RNC\t0/1:11:6,5:33:33,0,44:..", + "chr16\t346264\tchr16_346264_A_G\tA\tG\t31\t.\tAF=0.5;AQ=31;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|synonymous_variant|LOW|AXIN1|ENSG00000103126|Transcript|ENST00000262320|protein_coding|2/11||||1198|762|254|D|gaT/gaC|||-1||HGNC|HGNC:903,G|synonymous_variant|LOW|AXIN1|ENSG00000103126|Transcript|ENST00000354866|protein_coding|2/10||||924|762|254|D|gaT/gaC|||-1||HGNC|HGNC:903,G|non_coding_transcript_exon_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000461023|retained_intron|1/8||||59|||||||-1||HGNC|HGNC:903,G|intron_variant&non_coding_transcript_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000481769|protein_coding_CDS_not_defined||1/4||||||||||-1||HGNC|HGNC:903\tGT:DP:AD:GQ:PL:RNC\t0/1:26:15,11:31:31,0,40:..", + "chr16\t377013\tchr16_377013_G_C\tG\tC\t28\t.\tAF=0.5;AQ=28;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000250930|protein_coding||4/12||||||||||-1||HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000424078|protein_coding|||||||||||2162|-1|cds_start_NF|HGNC|HGNC:17205,C|intron_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000427313|protein_coding||4/4||||||||||-1|cds_end_NF|HGNC|HGNC:17205,C|intron_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000431232|protein_coding||4/12||||||||||-1||HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000448854|protein_coding|||||||||||1798|-1|cds_start_NF|HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000467452|retained_intron|||||||||||1938|-1||HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000475348|retained_intron|||||||||||538|-1||HGNC|HGNC:17205,C|downstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000476735|protein_coding_CDS_not_defined|||||||||||95|-1||HGNC|HGNC:17205\tGT:DP:AD:GQ:PL:RNC\t0/1:17:11,6:29:28,0,42:..", + "chr16\t520623\tchr16_520623_G_A\tG\tA\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000262305|protein_coding||13/13||||||||||1||HGNC|HGNC:17224,A|intron_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000434585|protein_coding||14/14||||||||||1||HGNC|HGNC:17224,A|intron_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000450428|protein_coding||11/11||||||||||1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000461009|retained_intron|||||||||||1515|1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000464263|retained_intron|||||||||||861|1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000487899|retained_intron|||||||||||1605|1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|LINC00235|ENSG00000277142|Transcript|ENST00000622160|lncRNA|||||||||||4532|-1||HGNC|HGNC:14138\tGT:DP:AD:GQ:PL:RNC\t0/1:18:9,9:34:34,0,44:..", + "chr16\t551632\tchr16_551632_T_C\tT\tC\t21\t.\tAF=1;AQ=21;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|synonymous_variant|LOW|CAPN15|ENSG00000103326|Transcript|ENST00000219611|protein_coding|9/14||||2820|2313|771|G|ggT/ggC|||1||HGNC|HGNC:11182,C|downstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000562370|protein_coding|||||||||||4240|1|cds_end_NF|HGNC|HGNC:11182,C|upstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000565010|retained_intron|||||||||||665|1||HGNC|HGNC:11182,C|intron_variant&non_coding_transcript_variant|MODIFIER||ENSG00000261691|Transcript|ENST00000565879|lncRNA||1/1||||||||||-1|||,C|upstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000566977|protein_coding|||||||||||1123|1|cds_start_NF|HGNC|HGNC:11182,C|downstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000568988|protein_coding|||||||||||3423|1|cds_end_NF|HGNC|HGNC:11182,C|downstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000637507|protein_coding|||||||||||3601|1|cds_end_NF|HGNC|HGNC:11182\tGT:DP:AD:GQ:PL:RNC\t1/1:13:0,13:15:21,16,0:..", + "chr16\t582180\tchr16_582180_G_C\tG\tC\t25\t.\tAF=1;AQ=25;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000026218|protein_coding||9/9||||||||||1||HGNC|HGNC:14135,C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000321878|protein_coding||9/10||||||||||1||HGNC|HGNC:14135,C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000409527|protein_coding||10/11||||||||||1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000420990|nonsense_mediated_decay||6/7||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000422307|protein_coding|||||||||||4946|1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000443147|nonsense_mediated_decay||10/11||||||||||1||HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000470411|protein_coding|||||||||||4963|1||HGNC|HGNC:14135,C|non_coding_transcript_exon_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000476438|retained_intron|1/2||||420|||||||1||HGNC|HGNC:14135,C|intron_variant&non_coding_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000480424|retained_intron||3/4||||||||||1||HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000537901|retained_intron|||||||||||3378|1||HGNC|HGNC:14135,C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000540241|protein_coding||2/3||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|upstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000540548|protein_coding_CDS_not_defined|||||||||||310|1||HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000544860|protein_coding_CDS_not_defined|||||||||||3337|1||HGNC|HGNC:14135,C|upstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000634341|retained_intron|||||||||||697|1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000635205|nonsense_mediated_decay||3/4||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000635909|nonsense_mediated_decay||8/9||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000635935|protein_coding_CDS_not_defined|||||||||||1403|1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000636005|nonsense_mediated_decay||8/9||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000636657|nonsense_mediated_decay||10/11||||||||||1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000637468|nonsense_mediated_decay||6/7||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000638143|nonsense_mediated_decay|||||||||||1258|1|cds_start_NF|HGNC|HGNC:14135,C|upstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000638152|protein_coding_CDS_not_defined|||||||||||1404|1||HGNC|HGNC:14135\tGT:DP:AD:GQ:PL:RNC\t1/1:6:0,6:9:25,8,0:..", + "chr16\t627629\tchr16_627629_T_C\tT\tC\t20\t.\tAF=1;AQ=20;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000248139|protein_coding|6/6||||1079|||||||1||HGNC|HGNC:18285,C|upstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000319070|protein_coding|||||||||||3356|1||HGNC|HGNC:30912,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000509637|nonsense_mediated_decay|||||||||||1636|1|cds_start_NF|HGNC|HGNC:18285,C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000535977|protein_coding|7/7||||1075|||||||1||HGNC|HGNC:18285,C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000538492|protein_coding|7/7||||927|||||||1||HGNC|HGNC:18285,C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000539661|protein_coding|7/7||||967|||||||1||HGNC|HGNC:18285,C|non_coding_transcript_exon_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000561781|retained_intron|2/2||||796|||||||1||HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000563109|protein_coding|||||||||||46|1|cds_start_NF&cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000564703|protein_coding|||||||||||1512|1|cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000565511|nonsense_mediated_decay|||||||||||1546|1||HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000566290|protein_coding|||||||||||214|1|cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000568586|protein_coding|||||||||||1508|1|cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000569575|protein_coding|||||||||||1603|1|cds_end_NF|HGNC|HGNC:18285,C|upstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000573440|retained_intron|||||||||||1610|1||HGNC|HGNC:30912\tGT:DP:AD:GQ:PL:RNC\t1/1:13:0,13:13:20,13,0:..", + "chr16\t638557\tchr16_638557_A_C\tA\tC\t24\t.\tAF=1;AQ=24;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000301686|protein_coding|||||||||||2252|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000307650|protein_coding|||||||||||3271|1||HGNC|HGNC:14142,C|downstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000319070|protein_coding|||||||||||4440|1||HGNC|HGNC:30912,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000338401|protein_coding|||||||||||2215|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000397664|protein_coding|||||||||||2206|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000397665|protein_coding|||||||||||2267|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000397666|protein_coding|||||||||||2255|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000448973|retained_intron|||||||||||2247|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000456420|nonsense_mediated_decay|||||||||||2318|-1|cds_start_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000474840|nonsense_mediated_decay|||||||||||3372|1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000491999|nonsense_mediated_decay|||||||||||3289|1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000564039|nonsense_mediated_decay|||||||||||2279|-1|cds_start_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000565163|protein_coding|||||||||||2297|-1|cds_start_NF&cds_end_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000565799|retained_intron|||||||||||2290|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000568077|nonsense_mediated_decay|||||||||||2226|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000568773|protein_coding|||||||||||2322|-1|cds_start_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000568830|protein_coding|||||||||||2199|-1|cds_end_NF|HGNC|HGNC:14141,C|downstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000573440|retained_intron|||||||||||4441|1||HGNC|HGNC:30912,C|intron_variant&non_coding_transcript_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000611328|protein_coding_CDS_not_defined||1/3||||||||||1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000614890|protein_coding|||||||||||2191|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000615744|protein_coding_CDS_not_defined|||||||||||3894|1||HGNC|HGNC:14142,C|non_coding_transcript_exon_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000619114|protein_coding_CDS_not_defined|2/2||||576|||||||1||HGNC|HGNC:14142,C|non_coding_transcript_exon_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000619377|protein_coding_CDS_not_defined|2/2||||664|||||||1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000620462|retained_intron|||||||||||3926|1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000629534|protein_coding|||||||||||3292|1||HGNC|HGNC:14142\tGT:DP:AD:GQ:PL:RNC\t1/1:14:0,14:16:24,17,0:..", + "chr16\t655844\tchr16_655844_T_C\tT\tC\t23\t.\tAF=1;AQ=23;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000293879|protein_coding|17/41||||1973|1921|641|L|Ttg/Ctg|||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000420061|retained_intron|17/17||||1985|||||||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000546516|retained_intron|3/20||||614|||||||1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000547407|retained_intron|||||||||||431|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548844|protein_coding_CDS_not_defined|||||||||||2716|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548859|retained_intron|||||||||||589|1||HGNC|HGNC:26960,C|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000549091|protein_coding|17/41||||2013|1921|641|L|Ttg/Ctg|||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000549648|retained_intron|17/17||||1988|||||||1||HGNC|HGNC:26960,C|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000550593|retained_intron|||||||||||2374|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000550739|protein_coding|||||||||||3401|1|cds_start_NF&cds_end_NF|HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552648|protein_coding_CDS_not_defined|4/7||||455|||||||1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552683|retained_intron|||||||||||2014|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552728|nonsense_mediated_decay|||||||||||577|1|cds_start_NF|HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552943|retained_intron|15/26||||3591|||||||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER||ENSG00000262528|Transcript|ENST00000573609|lncRNA|1/2||||351|||||||-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:14:0,14:16:23,17,0:..", + "chr16\t667523\tchr16_667523_G_T\tG\tT\t27\t.\tAF=1;AQ=27;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000293879|protein_coding|41/41||||5233|5181|1727|P|ccG/ccT|||1||HGNC|HGNC:26960,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000315082|protein_coding|||||||||||609|1||HGNC|HGNC:21169,T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000315764|protein_coding|6/6||||1147|834|278|P|ccG/ccT|||1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000546516|retained_intron|||||||||||4090|1||HGNC|HGNC:26960,T|missense_variant|MODERATE|WDR90|ENSG00000161996|Transcript|ENST00000546896|protein_coding|2/2||||89|89|30|R/L|cGt/cTt|||1|cds_start_NF|HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000546923|retained_intron|||||||||||4787|1||HGNC|HGNC:26960,T|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000547407|retained_intron|24/24||||3541|||||||1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000547543|protein_coding_CDS_not_defined|||||||||||778|1||HGNC|HGNC:26960,T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000547944|protein_coding|7/7||||1365|978|326|P|ccG/ccT|||1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548448|retained_intron|||||||||||1162|1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548603|protein_coding_CDS_not_defined|||||||||||4086|1||HGNC|HGNC:26960,T|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000549024|retained_intron|12/12||||1951|||||||1||HGNC|HGNC:26960,T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000549091|protein_coding|41/41||||5279|5187|1729|P|ccG/ccT|||1||HGNC|HGNC:26960,T|3_prime_UTR_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000551100|protein_coding|3/3||||315|||||||1|cds_start_NF|HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552683|retained_intron|||||||||||4085|1||HGNC|HGNC:26960,T|3_prime_UTR_variant&NMD_transcript_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552728|nonsense_mediated_decay|22/22||||2826|||||||1|cds_start_NF|HGNC|HGNC:26960,T|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000553080|retained_intron|6/6||||1635|||||||1||HGNC|HGNC:26960,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000561711|retained_intron|||||||||||4394|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000561929|protein_coding|||||||||||686|1|cds_start_NF&cds_end_NF|HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000561983|nonsense_mediated_decay|||||||||||582|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000562333|nonsense_mediated_decay|||||||||||664|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000562598|retained_intron|||||||||||659|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000562708|retained_intron|||||||||||589|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000563134|protein_coding|||||||||||583|1|cds_end_NF|HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000563637|nonsense_mediated_decay|||||||||||628|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000563776|retained_intron|||||||||||582|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000565004|retained_intron|||||||||||3559|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000566214|nonsense_mediated_decay|||||||||||628|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000566965|retained_intron|||||||||||1037|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000567017|retained_intron|||||||||||639|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000567589|retained_intron|||||||||||3005|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000568636|retained_intron|||||||||||1135|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000568950|retained_intron|||||||||||655|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569358|retained_intron|||||||||||2932|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569675|retained_intron|||||||||||612|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569706|retained_intron|||||||||||609|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569943|protein_coding_CDS_not_defined|||||||||||830|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000570092|retained_intron|||||||||||601|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000570280|nonsense_mediated_decay|||||||||||630|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000602564|retained_intron|||||||||||657|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000697194|protein_coding|||||||||||577|1||HGNC|HGNC:21169\tGT:DP:AD:GQ:PL:RNC\t1/1:22:0,22:23:27,24,0:..", + "chr16\t690466\tchr16_690466_A_G\tA\tG\t26\t.\tAF=1;AQ=26;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000248142|protein_coding|||||||||||22|-1||HGNC|HGNC:20852,G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000293883|protein_coding|||||||||||68|-1||HGNC|HGNC:20852,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000397621|protein_coding|||||||||||2034|-1||HGNC|HGNC:14150,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000562563|protein_coding|||||||||||2461|-1||HGNC|HGNC:14150,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000562585|protein_coding_CDS_not_defined|||||||||||4192|-1||HGNC|HGNC:14150,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000562648|retained_intron|||||||||||2032|-1||HGNC|HGNC:14150,G|intron_variant&non_coding_transcript_variant|MODIFIER||ENSG00000261659|Transcript|ENST00000566927|lncRNA||1/1||||||||||1|||,G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000567014|retained_intron|||||||||||3537|-1||HGNC|HGNC:20852,G|non_coding_transcript_exon_variant|MODIFIER||ENSG00000261659|Transcript|ENST00000575305|retained_intron|1/1||||1466|||||||1|||,G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000647644|protein_coding|||||||||||219|-1||HGNC|HGNC:20852\tGT:DP:AD:GQ:PL:RNC\t1/1:17:0,17:20:26,21,0:..", + "chr16\t723341\tchr16_723341_G_A\tG\tA\t23\t.\tAF=1;AQ=23;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000219535|protein_coding|||||||||||768|1||HGNC|HGNC:14152,A|intron_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000293889|protein_coding||11/13||||||||||-1||HGNC|HGNC:14153,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000341413|protein_coding|||||||||||3617|1||HGNC|HGNC:14177,A|intron_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000345165|protein_coding||11/13||||||||||-1||HGNC|HGNC:14153,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000389701|retained_intron|||||||||||4064|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000389703|protein_coding|||||||||||3765|1||HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000423653|protein_coding_CDS_not_defined|||||||||||1586|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000439619|retained_intron|||||||||||1209|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000460023|retained_intron|||||||||||1566|-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000463539|retained_intron||9/11||||||||||-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000466708|retained_intron||10/12||||||||||-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000471861|retained_intron|||||||||||1660|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000474647|retained_intron|||||||||||1782|-1||HGNC|HGNC:14153,A|non_coding_transcript_exon_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000478979|retained_intron|4/4||||2296|||||||-1||HGNC|HGNC:14153,A|non_coding_transcript_exon_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000481804|retained_intron|4/5||||2627|||||||-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000482152|retained_intron||3/4||||||||||-1||HGNC|HGNC:14153,A|non_coding_transcript_exon_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000482878|retained_intron|4/4||||2699|||||||-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000485091|retained_intron||10/11||||||||||-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000538176|retained_intron|||||||||||1201|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000544996|retained_intron|||||||||||1643|-1||HGNC|HGNC:14153,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000549114|protein_coding|||||||||||3595|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000561546|protein_coding|||||||||||3905|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000561750|retained_intron|||||||||||3934|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000562141|protein_coding|||||||||||3778|1|cds_end_NF|HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000562187|protein_coding|||||||||||3765|1|cds_end_NF|HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000563792|protein_coding|||||||||||4398|1|cds_end_NF|HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000564000|protein_coding|||||||||||756|1||HGNC|HGNC:14152,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000564537|protein_coding|||||||||||3765|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000564545|protein_coding|||||||||||3921|1||HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000564640|retained_intron|||||||||||1029|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000566437|retained_intron|||||||||||833|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000566525|retained_intron|||||||||||752|1||HGNC|HGNC:14152,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000567414|protein_coding|||||||||||3952|1|cds_end_NF|HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000567696|retained_intron|||||||||||3977|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000568141|protein_coding|||||||||||4005|1|cds_end_NF|HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|METRN|ENSG00000103260|Transcript|ENST00000568223|protein_coding|||||||||||3686|1||HGNC|HGNC:14151,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000568916|protein_coding|||||||||||751|1|cds_start_NF|HGNC|HGNC:14152,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000569143|retained_intron|||||||||||3977|1||HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000569529|protein_coding|||||||||||751|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000570237|retained_intron|||||||||||751|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000650995|protein_coding|||||||||||2453|-1|cds_end_NF|HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000682391|retained_intron||7/9||||||||||-1||HGNC|HGNC:14153\tGT:DP:AD:GQ:PL:RNC\t1/1:17:0,17:19:23,21,0:..", + "chr16\t739761\tchr16_739761_A_G\tA\tG\t22\t.\tAF=1;AQ=22;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000251588|protein_coding||1/10||||||||||-1||HGNC|HGNC:14179,G|5_prime_UTR_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000540986|protein_coding|1/10||||1174|||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000562421|protein_coding|||||||||||2006|-1|cds_end_NF|HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000562752|retained_intron||1/4||||||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000563534|protein_coding_CDS_not_defined|||||||||||2297|-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565065|nonsense_mediated_decay||2/5||||||||||-1||HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565341|retained_intron||1/3||||||||||-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565425|nonsense_mediated_decay||1/9||||||||||-1||HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565693|protein_coding_CDS_not_defined||1/2||||||||||-1||HGNC|HGNC:14179,G|intron_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000566614|protein_coding||1/4||||||||||-1|cds_end_NF|HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000567172|protein_coding_CDS_not_defined||1/1||||||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000567403|protein_coding|||||||||||2009|-1|cds_end_NF|HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000567455|protein_coding_CDS_not_defined||1/2||||||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000568545|protein_coding|||||||||||1872|-1||HGNC|HGNC:14179,G|downstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000569604|protein_coding_CDS_not_defined|||||||||||4236|1||HGNC|HGNC:14177,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000569759|protein_coding_CDS_not_defined|||||||||||3391|-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000570066|nonsense_mediated_decay||1/4||||||||||-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000570289|nonsense_mediated_decay||1/3||||||||||-1||HGNC|HGNC:14179,G|downstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000647875|protein_coding_CDS_not_defined|||||||||||4408|1||HGNC|HGNC:14177\tGT:DP:AD:GQ:PL:RNC\t1/1:14:0,14:14:22,14,0:..", + "chr16\t766294\tchr16_766294_T_G\tT\tG\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000382862|protein_coding||11/16||||||||||1||HGNC|HGNC:7371,G|upstream_gene_variant|MODIFIER|MIR662|ENSG00000207579|Transcript|ENST00000384847|miRNA|||||||||||3889|1||HGNC|HGNC:32918,G|downstream_gene_variant|MODIFIER|MSLNL|ENSG00000162006|Transcript|ENST00000543963|protein_coding|||||||||||3134|-1|cds_start_NF|HGNC|HGNC:14170,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000545450|protein_coding||12/17||||||||||1||HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000561896|protein_coding||2/6||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000563651|protein_coding||10/10||||||||||1|cds_end_NF|HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000563941|protein_coding||12/17||||||||||1||HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000566269|protein_coding||2/7||||||||||1|cds_start_NF|HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000566549|protein_coding||11/16||||||||||1||HGNC|HGNC:7371,G|downstream_gene_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000569566|protein_coding|||||||||||1258|1|cds_end_NF|HGNC|HGNC:7371,G|downstream_gene_variant|MODIFIER||ENSG00000279136|Transcript|ENST00000623562|TEC|||||||||||2825|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:12,7:29:29,0,43:..", + "chr16\t789996\tchr16_789996_T_C\tT\tC\t36\t.\tAF=0.5;AQ=36;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000007264|protein_coding|||||||||||1615|-1||HGNC|HGNC:14173,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000262315|protein_coding||4/21||||||||||1||HGNC|HGNC:18435,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000317063|protein_coding||4/21||||||||||1||HGNC|HGNC:18435,C|splice_polypyrimidine_tract_variant&intron_variant|LOW|CHTF18|ENSG00000127586|Transcript|ENST00000426047|protein_coding||2/6||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:18435,C|intron_variant&NMD_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000440239|nonsense_mediated_decay||4/13||||||||||1||HGNC|HGNC:18435,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000455171|protein_coding||3/20||||||||||1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000461268|protein_coding_CDS_not_defined|||||||||||2528|1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000464728|retained_intron||3/17||||||||||1||HGNC|HGNC:18435,C|splice_polypyrimidine_tract_variant&intron_variant&non_coding_transcript_variant|LOW|CHTF18|ENSG00000127586|Transcript|ENST00000471202|retained_intron||4/18||||||||||1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000479976|retained_intron||4/6||||||||||1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000484349|retained_intron||1/3||||||||||1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000491530|protein_coding_CDS_not_defined||2/5||||||||||1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000493715|protein_coding_CDS_not_defined|||||||||||4706|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000561734|protein_coding|||||||||||2015|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000562070|protein_coding|||||||||||2527|-1|cds_start_NF|HGNC|HGNC:14173,C|downstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000563545|nonsense_mediated_decay|||||||||||360|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000563560|protein_coding|||||||||||1667|-1|cds_end_NF|HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000565377|protein_coding|||||||||||1615|-1|cds_end_NF|HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000565503|nonsense_mediated_decay|||||||||||1656|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000565787|retained_intron|||||||||||1141|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000565809|protein_coding|||||||||||1625|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000567114|protein_coding|||||||||||1599|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000567283|nonsense_mediated_decay|||||||||||1663|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000567620|retained_intron|||||||||||703|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000569270|nonsense_mediated_decay|||||||||||255|1|cds_start_NF|HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000569601|protein_coding|||||||||||1662|-1|cds_end_NF|HGNC|HGNC:14173,C|downstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000570058|protein_coding_CDS_not_defined|||||||||||656|1||HGNC|HGNC:18435,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000631357|protein_coding||5/23||||||||||1||HGNC|HGNC:18435\tGT:DP:AD:GQ:PL:RNC\t0/1:15:7,8:36:36,0,49:..", + "chr16\t813336\tchr16_813336_T_C\tT\tC\t32\t.\tAF=0.5;AQ=32;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER||ENSG00000287855|Transcript|ENST00000655150|lncRNA|||||||||||4153|1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:14:5,9:32:32,0,39:..", + "chr16\t893361\tchr16_893361_T_C\tT\tC\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000262301|protein_coding||4/10||||||||||-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000543238|protein_coding||1/7||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||5/11||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562226|nonsense_mediated_decay||6/7||||||||||-1||HGNC|HGNC:14154,C|upstream_gene_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000565198|retained_intron|||||||||||109|-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000566627|protein_coding||3/5||||||||||-1|cds_end_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568897|protein_coding||3/9||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568964|nonsense_mediated_decay||4/5||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||4/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154\tGT:DP:AD:GQ:PL:RNC\t0/1:17:6,11:34:34,0,45:..", + "chr16\t947473\tchr16_947473_T_C\tT\tC\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000262301|protein_coding||2/10||||||||||-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000543238|protein_coding||1/7||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||2/11||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562226|nonsense_mediated_decay||2/7||||||||||-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562380|protein_coding||2/3||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|5_prime_UTR_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000566627|protein_coding|1/6||||172|||||||-1|cds_end_NF|HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000567595|nonsense_mediated_decay||2/4||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568897|protein_coding||2/9||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568964|nonsense_mediated_decay||2/5||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||2/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154\tGT:DP:AD:GQ:PL:RNC\t0/1:23:8,15:30:29,0,47:..", + "chr16\t964119\tchr16_964119_A_G\tA\tG\t24\t.\tAF=1;AQ=24;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000262301|protein_coding||1/10||||||||||-1||HGNC|HGNC:14154,G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000543238|protein_coding||1/7||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||1/11||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562226|nonsense_mediated_decay||1/7||||||||||-1||HGNC|HGNC:14154,G|missense_variant|MODERATE|LMF1|ENSG00000103227|Transcript|ENST00000562380|protein_coding|1/4||||4|4|2|Y/H|Tat/Cat|||-1|cds_start_NF|HGNC|HGNC:14154,G|intron_variant&non_coding_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000566609|protein_coding_CDS_not_defined||1/2||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000567595|nonsense_mediated_decay||1/4||||||||||-1|cds_start_NF|HGNC|HGNC:14154,G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568897|protein_coding||1/9||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568964|nonsense_mediated_decay||1/5||||||||||-1|cds_start_NF|HGNC|HGNC:14154,G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||1/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154,G|downstream_gene_variant|MODIFIER||ENSG00000276931|Transcript|ENST00000620075|lncRNA|||||||||||4256|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:29:0,29:21:24,24,0:..", + "chr16\t977069\tchr16_977069_T_C\tT\tC\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|SOX8|ENSG00000005513|Transcript|ENST00000293894|protein_coding|||||||||||4701|1||HGNC|HGNC:11203,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||1/11||||||||||-1||HGNC|HGNC:14154,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000562570|lncRNA|2/2||||1914|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000563837|lncRNA|4/4||||423|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000563863|lncRNA|4/4||||423|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000565069|lncRNA|3/3||||265|||||||-1||HGNC|HGNC:53928,C|upstream_gene_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000565139|lncRNA|||||||||||143|-1||HGNC|HGNC:53928,C|intron_variant&non_coding_transcript_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000565467|lncRNA||1/1||||||||||-1||HGNC|HGNC:53928,C|upstream_gene_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000567961|lncRNA|||||||||||157|-1||HGNC|HGNC:53928,C|intron_variant&non_coding_transcript_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000568394|lncRNA||1/2||||||||||-1||HGNC|HGNC:53928,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||1/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000655952|lncRNA|3/3||||743|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000662104|lncRNA|3/3||||322|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000669274|lncRNA|3/3||||225|||||||-1||HGNC|HGNC:53928\tGT:DP:AD:GQ:PL:RNC\t0/1:17:6,11:34:34,0,44:.." + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T10:10:59.241809478" + } +} \ No newline at end of file diff --git a/subworkflows/local/snv_annotation/tests/nextflow.config b/subworkflows/local/snv_annotation/tests/nextflow.config new file mode 100644 index 00000000..2de8ad16 --- /dev/null +++ b/subworkflows/local/snv_annotation/tests/nextflow.config @@ -0,0 +1,71 @@ +process { + withName: 'DEEPVARIANT' { + ext.prefix = { intervals ? "${meta.id}_${intervals}_deepvariant" : "${meta.id}_deepvariant" } + ext.args = { [ + '--model_type WGS', + "--sample_name=${meta.id}", + '-vcf_stats_report=False' + ].join(' ') } + } + + withName: 'GLNEXUS' { + ext.args = '--config DeepVariant_unfiltered' + } + + withName: 'BCFTOOLS_CONCAT' { + ext.prefix = { "${meta.id}_concat" } + ext.args = [ + '--no-version', + '--allow-overlaps' + ].join(' ') + } + + withName: 'BCFTOOLS_NORM_MULTISAMPLE' { + ext.prefix = { "${meta.id}_norm_multisample" } + ext.args = [ + '--no-version', + '-m -', + '--output-type u', + '--write-index=csi', + '-w 10000' + ].join(' ') + } + + withName: 'BCFTOOLS_NORM_SINGLESAMPLE' { + ext.prefix = { "${meta.id}_norm_singlesample" } + ext.args = [ + '--no-version', + '-m -', + '-w 10000', + '--output-type u', + ].join(' ') + } + + withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_FILLTAGS' { + ext.prefix = { "${meta.id}_ac" } + ext.args = [ + '--no-version', + '--output-type u' + ].join(' ') + } + + withName: 'BCFTOOLS_FILLTAGS_ANNO' { + ext.prefix = { "${meta.id}_filltags_anno" } + ext.args = [ + '--no-version', + '--output-type z' + ].join(' ') + } + + withName: 'MINIMAP2_ALIGN' { + ext.args = '-x map-hifi' + } + + withName: 'ENSEMBLVEP_VEP' { + ext.args = { [ + '--offline', + '--vcf', + '--compress_output bgzip' + ].join(' ') } + } +} diff --git a/subworkflows/local/structural_variant_calling.nf b/subworkflows/local/structural_variant_calling.nf index ace51636..be738c63 100644 --- a/subworkflows/local/structural_variant_calling.nf +++ b/subworkflows/local/structural_variant_calling.nf @@ -5,7 +5,6 @@ workflow STRUCTURAL_VARIANT_CALLING { take: ch_bam_bai // channel: [ val(meta), [[ bam ], [bai]] ] - ch_snfs ch_fasta ch_fai ch_tandem_repeats @@ -15,13 +14,10 @@ workflow STRUCTURAL_VARIANT_CALLING { SNIFFLES (ch_bam_bai, ch_fasta, ch_tandem_repeats, true, true) - // Combine sniffles output with supplied extra snfs SNIFFLES.out.snf - .map{ it [1] } - .concat(ch_snfs.map{ it[1] }) - .collect() - .sort{ it.name } - .map { snfs -> [ [id:'multisample'], snfs, [] ] } + .map { meta, snf -> [ [ 'id': meta.project ], snf ] } + .groupTuple() + .map { meta, snfs -> [ meta, snfs, [] ] } .set{ ch_multisample_input } SNIFFLES_MULTISAMPLE( ch_multisample_input, ch_fasta, ch_tandem_repeats, true, false ) diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf index 0b49178f..91cb7ac5 100644 --- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf @@ -35,50 +35,50 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' // Define subworkflows and their associated "--skip" // def workflowSkips = [ - assembly : "skip_assembly_wf", - qc : "skip_raw_read_qc", - mapping : "skip_mapping_wf", - snv_calling : "skip_short_variant_calling", - snv_annotation: "skip_snv_annotation", - call_paralogs : "skip_call_paralogs", - cnv_calling : "skip_cnv_calling", - phasing : "skip_phasing_wf", - repeat_calling: "skip_repeat_wf", - methylation : "skip_methylation_wf", + assembly : "skip_assembly_wf", + raw_read_qc : "skip_raw_read_qc", + aligned_read_qc : "skip_aligned_read_qc", + mapping : "skip_mapping_wf", + snv_calling : "skip_short_variant_calling", + snv_annotation : "skip_snv_annotation", + call_paralogs : "skip_call_paralogs", + cnv_calling : "skip_cnv_calling", + phasing : "skip_phasing_wf", + rank_variants : "skip_rank_variants", + repeat_calling : "skip_repeat_calling", + repeat_annotation: "skip_repeat_annotation", + methylation : "skip_methylation_wf", ] // // E.g., the CNV-calling workflow depends on mapping and snv_calling and can't run without them. // def workflowDependencies = [ - assembly : ["mapping"], - call_paralogs : ["mapping"], - snv_calling : ["mapping"], - snv_annotation : ["mapping", "snv_calling"], - cnv_calling : ["mapping", "snv_calling"], - phasing : ["mapping", "snv_calling"], - repeat_calling : ["mapping", "snv_calling", "phasing"], - methylation : ["mapping", "snv_calling", "phasing"], + aligned_read_qc : ["mapping"], + assembly : ["mapping"], + call_paralogs : ["mapping"], + snv_calling : ["mapping"], + snv_annotation : ["mapping", "snv_calling"], + cnv_calling : ["mapping", "snv_calling"], + phasing : ["mapping", "snv_calling"], + rank_variants : ["mapping", "snv_calling", "snv_annotation"], + repeat_calling : ["mapping", "snv_calling", "phasing"], + repeat_annotation: ["mapping", "snv_calling", "phasing", "repeat_calling"], + methylation : ["mapping", "snv_calling", "phasing"], ] // -// E.g., the dipcall_par file is required by the assembly workflow and the assembly workflow can't run without dipcall_par +// E.g., the par_regions file is required by the assembly workflow and the assembly workflow can't run without par_regions // def fileDependencies = [ - mapping : ["fasta", "somalier_sites"], - assembly : ["fasta"], // The assembly workflow should be split into two - assembly and variant calling (requires ref) - assembly : ["dipcall_par"], - snv_annotation: ["snp_db", "vep_cache"], - cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"], - repeat_calling: ["trgt_repeats"] -] - -// -// E.g., pacbio can't run with the methylation workflow -// -def presetIncompatibilities = [ - pacbio : ["methylation"], - ONT_R10: ["assembly", "cnv_calling"], + mapping : ["fasta", "somalier_sites"], + assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref) + snv_calling : ["fasta", "par_regions"], + snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "variant_consequences_snv"], + cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"], + rank_variants : ["reduced_penetrance", "score_config_snv"], + repeat_calling : ["trgt_repeats"], + repeat_annotation: ["variant_catalog"], ] def parameterStatus = [ @@ -86,29 +86,32 @@ def parameterStatus = [ skip_short_variant_calling: params.skip_short_variant_calling, skip_phasing_wf : params.skip_phasing_wf, skip_methylation_wf : params.skip_methylation_wf, - skip_repeat_wf : params.skip_repeat_wf, + skip_rank_variants : params.skip_rank_variants, + skip_repeat_calling : params.skip_repeat_calling, + skip_repeat_annotation : params.skip_repeat_annotation, skip_snv_annotation : params.skip_snv_annotation, skip_call_paralogs : params.skip_call_paralogs, skip_cnv_calling : params.skip_cnv_calling, skip_mapping_wf : params.skip_mapping_wf, - skip_qc : params.skip_qc, + skip_aligned_read_qc : params.skip_aligned_read_qc, + skip_raw_read_qc : params.skip_raw_read_qc, skip_assembly_wf : params.skip_assembly_wf, ], files: [ - dipcall_par : params.dipcall_par, - snp_db : params.snp_db, - somalier_sites : params.somalier_sites, - vep_cache : params.vep_cache, - hificnv_xy : params.hificnv_xy, - hificnv_xx : params.hificnv_xx, - hificnv_exclude: params.hificnv_exclude, - fasta : params.fasta, - trgt_repeats : params.trgt_repeats, - ], - preset: [ - pacbio : params.preset == "pacbio", - revio : params.preset == "revio", - ONT_R10: params.preset == "ONT_R10", + par_regions : params.par_regions, + snp_db : params.snp_db, + somalier_sites : params.somalier_sites, + vep_cache : params.vep_cache, + hificnv_xy : params.hificnv_xy, + hificnv_xx : params.hificnv_xx, + hificnv_exclude : params.hificnv_exclude, + fasta : params.fasta, + trgt_repeats : params.trgt_repeats, + variant_catalog : params.variant_catalog, + score_config_snv : params.score_config_snv, + reduced_penetrance : params.reduced_penetrance, + score_config_snv : params.score_config_snv, + variant_consequences_snv: params.variant_consequences_snv, ] ] @@ -165,7 +168,7 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters(parameterStatus, workflowSkips, workflowDependencies, fileDependencies, presetIncompatibilities) + validateInputParameters(parameterStatus, workflowSkips, workflowDependencies, fileDependencies) // // Create channel from input file provided through params.input @@ -181,7 +184,7 @@ workflow PIPELINE_INITIALISATION { .groupTuple() // group by sample .map { sample, metas, reads -> // Add number of files per sample _after_ splitting to meta - [ sample, metas[0] + [n_files: metas.size() + metas.size() * Math.max(0, params.split_fastq - 1), single_end:true ], reads ] + [ sample, metas[0] + [n_files: metas.size() + metas.size() * Math.max(0, params.parallel_alignments - 1), single_end:true ], reads ] } // Convert back to [ meta, reads ] .flatMap { @@ -190,6 +193,25 @@ workflow PIPELINE_INITIALISATION { } .set { ch_samplesheet } + // Check that there's samples with affected phenotype if we are ranking variants + ch_samplesheet + .filter { meta, reads -> meta.phenotype == 2 } + .ifEmpty { + if(!params.skip_rank_variants) { + error("No samples in samplesheet has affected phenotype (=2), --skip_rank_variants has to be active.") + } + } + + // Check that there's no more than one project + // TODO: Try to do this in nf-schema + ch_samplesheet + .map { meta, reads -> meta.project } + .unique() + .collect() + .filter{ it.size() == 1 } + .ifEmpty { + error("Only one project may be specified per run") + } emit: samplesheet = ch_samplesheet versions = ch_versions @@ -245,9 +267,9 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // -def validateInputParameters(statusMap, workflowMap, workflowDependencies, fileDependencies, presetDependencies) { +def validateInputParameters(statusMap, workflowMap, workflowDependencies, fileDependencies) { genomeExistsError() - validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies, presetDependencies) + validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies) } // @@ -288,27 +310,162 @@ def genomeExistsError() { // Generate methods description for MultiQC // def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "MultiQC (Ewels et al. 2016)", + "SAMtools (Danecek et al. 2021)", + ] + if (!params.skip_raw_read_qc) { + citation_text = citation_text + [ + "FastQC (Andrews 2010)", + "fcqrs", + ] + } + if (!params.skip_mapping_wf) { + if (params.parallel_alignments > 1) { + citation_text = citation_text + [ + "splitubam", + ] + } + citation_text = citation_text + [ + "SAMtools (Danecek et al. 2021)", + "Minimap2 (Li 2018)", + "Somalier (Pedersen et al. 2020)", + "Sniffles2 (Smolka et al. 2024)", + ] + if (!params.skip_aligned_read_qc) { + citation_text = citation_text + [ + "cramino (De Coster & Rademakers 2023)", + "mosdepth (Pedersen & Quinlan 2018)", + ] + } + if (!params.skip_call_paralogs) { + citation_text = citation_text + [ + "paraphase", + ] + } + if (!params.skip_assembly_wf) { + if (params.hifiasm_mode == 'trio-binning') { + citation_text = citation_text + [ + "yak", + ] + } + citation_text = citation_text + [ + "Hifiasm (Cheng et al. 2021)", + "Gfastats (Formenti et al. 2022)", + "dipcall (Li et al. 2018)", + "SAMtools (Danecek et al. 2021)", + "Minimap2 (Li 2018)", + ] + } + if (!params.skip_short_variant_calling) { + citation_text = citation_text + [ + "BEDTools (Quinlan & Hall 2010)", + "BCFtools (Danecek et al. 2021)", + "DeepVariant (Poplin et al. 2018)", + "GLnexus (Yun et al. 2021)", + ] + } + if (!params.skip_snv_annotation) { + citation_text = citation_text + [ + "CADD (Rentzsch et al. 2019, Rentzsch et al. 2021)", + "BCFtools (Danecek et al. 2021)", + "VEP (McLaren et al. 2016)", + "Tabix (Li 2011)", + "Echtvar (Pedersen & de Ridder 2023)", + ] + if (!params.skip_rank_variants) { + citation_text = citation_text + [ + "Genmod (Magnusson et al. 2018)", + "Tabix (Li 2011)", + ] + } + } + if (!params.skip_cnv_calling) { + citation_text = citation_text + [ + "HiFiCNV", + ] + } + if (!params.skip_phasing_wf) { + citation_text = citation_text + [ + "SAMtools (Danecek et al. 2021)", + "cramino (De Coster & Rademakers 2023)", + ] + if(params.phaser == 'whatshap') { + citation_text = citation_text + [ + "WhatsHap (Martin et al. 2016)", + ] + } + if(params.phaser == 'hiphase_sv') { + citation_text = citation_text + [ + "HiPhase (Holt et al. 2024)", + ] + } + if(params.phaser == 'hiphase_snv') { + citation_text = citation_text + [ + "HiPhase (Holt et al. 2024)", + ] + } + if (!params.skip_methylation_wf) { + citation_text = citation_text + [ + "modkit", + "Tabix (Li 2011)", + ] + } + if (!params.skip_repeat_calling) { + citation_text = citation_text + [ + "TRGT (Dolzhenko et al. 2024)", + ] + if (!params.skip_repeat_annotation) { + citation_text = citation_text + [ + "Stranger (Nilsson & Magnusson 2021)", + ] + } + } + } + } - return citation_text + def return_text = "Tools used in the workflow included: " + citation_text.unique(false) { a, b -> a <=> b }.join(', ') - "" + "." + return return_text } def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + + reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  • ", + "
  • Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031.
  • ", + "
  • Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
  • ", + "
  • Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311.
  • ", + "
  • Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008
  • ", + "
  • Quinlan AR and Hall IM, 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 26, 6, pp. 841–842.
  • ", + "
  • Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311
  • ", + "
  • Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9
  • ", + "
  • Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016
  • ", + "
  • Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235
  • ", + "
  • Li H, Bloom JM, Farjoun Y, Fleharty M, Gauthier L, Neale B, MacArthur D (2018) A synthetic-diploid benchmark for accurate variant-calling evaluation. Nat Methods, 15:595-597. [PMID:30013044]
  • ", + "
  • Brent S Pedersen, Jeroen de Ridder, Echtvar: compressed variant representation for rapid annotation and filtering of SNPs and indels, Nucleic Acids Research, Volume 51, Issue 1, 11 January 2023, Page e3, https://doi.org/10.1093/nar/gkac931
  • ", + "
  • McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome Biol. 2016;17(1):122. doi:10.1186/s13059-016-0974-4
  • ", + "
  • Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
  • ", + "
  • Magnusson M, Hughes T, Glabilloy, Bitdeli Chef. genmod: Version 3.7.3. Published online November 15, 2018. doi:10.5281/ZENODO.3841142
  • ", + "
  • Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristóbal Gallardo-Alba, Alice Giani, Olivier Fedrigo, Erich D Jarvis, Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs, Bioinformatics, Volume 38, Issue 17, September 2022, Pages 4214–4216, https://doi.org/10.1093/bioinformatics/btac460
  • ", + "
  • Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081
  • ", + "
  • Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5
  • ", + "
  • James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042
  • ", + "
  • Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191
  • ", + "
  • Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699
  • ", + "
  • Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294
  • ", + "
  • Smolka, M., Paulin, L.F., Grochowski, C.M. et al. Detection of mosaic and population-level structural variants with Sniffles2. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02024-y
  • ", + "
  • Pedersen, B.S., Bhetariya, P.J., Brown, J. et al. Somalier: rapid relatedness estimation for cancer and germline studies using efficient genome sketches. Genome Med 12, 62 (2020). https://doi.org/10.1186/s13073-020-00761-2
  • ", + "
  • Nilsson D, Magnusson M. moonso/stranger v0.7.1. Published online February 18, 2021. doi:10.5281/ZENODO.4548873
  • ", + "
  • Li H. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics. 2011;27(5):718-719. doi:10.1093/bioinformatics/btq671
  • ", + "
  • Dolzhenko, E., English, A., Dashnow, H. et al. Characterization and visualization of tandem repeats at genome scale. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02057-3
  • ", + "
  • Marcel Martin, Murray Patterson, Shilpa Garg, Sarah O Fischer, Nadia Pisanti, Gunnar W Klau, Alexander Schöenhuth, Tobias Marschall. bioRxiv 085050; doi: https://doi.org/10.1101/085050
  • ", + "
  • Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web.
  • ", + "
  • Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506.
  • ", + "
  • da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671.
  • ", + "
  • Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241.
  • ", + "
  • Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.
  • ", + ].join(' ').trim() return reference_text } @@ -335,10 +492,8 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() - + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() def methods_text = mqc_methods_yaml.text @@ -349,12 +504,12 @@ def methodsDescriptionText(mqc_methods_yaml) { } // -// Validate preset and workflow skip combinations +// Validate workflow skip combinations // -def validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies, presetIncompatibilities) { +def validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies) { // Array to store errors def errors = [] - // For each of the "workflow", "files", "preset" + // For each of the "workflow", "files" statusMap.each { paramsType, allParams -> // Go through all params and their status statusMap[paramsType].each { param, paramStatus -> @@ -365,9 +520,6 @@ def validateParameterCombinations(statusMap, workflowMap, workflowDependencies, case "workflow": checkWorkflowDependencies(param, workflowDependencies, statusMap, workflowMap, errors) break - case "preset": - checkPresetDependencies(param, presetIncompatibilities, statusMap, workflowMap, errors) - break default: break } @@ -383,35 +535,6 @@ def validateParameterCombinations(statusMap, workflowMap, workflowDependencies, } } -// -// Lookup all workflows that needs to be active for a certain preset -// -def checkPresetDependencies(String preset, Map combinationsMap, Map statusMap, Map workflowMap, List errors) { - - // If preset is not active, then give no error - presetIsActive = statusMap["preset"][preset] - if(!presetIsActive) { - return - } - - // Get all required workflows for a preset - def requiredWorkflows = combinationsMap[preset] as Set - // If no direct dependencies are found, return an empty list - if (!requiredWorkflows) { - return [] - } - // Collect the required --skips that are not active for the current preset - def dependencyString = findRequiredSkips("preset", requiredWorkflows, statusMap, workflowMap) - .collect { [ '--', it ].join('') } - .join(" ") - // If all reqired sets are set, give no error - if (!dependencyString) { - return - } - errors << "--preset $preset is active, the pipeline has to be run with: $dependencyString" - return errors -} - // // Lookup all workflows that needs to be active for another workflow // @@ -480,10 +603,6 @@ def findRequiredSkips(paramType, Set requiredWorkflows, Map statusMap, M if(workflowIsSkipped) { requiredSkips << skip } - } else if(paramType == "preset") { - if(!workflowIsSkipped) { - requiredSkips << skip - } } } return requiredSkips diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 00000000..7584d5ad --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,345 @@ +nextflow_pipeline { + + name "Test pipeline GENOMICMEDICINESWEDEN_NALLO" + script "../main.nf" + profile "test" + tag "PIPELINE" + + test("test profile") { + tag "samplesheet" + + when { + params { + // Base directory for genomic-medicine-sweden/nallo test data + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/' + // Test files + fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz' + input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet.csv' + bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed' + hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed' + hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed' + hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed' + par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed' + trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed' + variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json' + vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz' + vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv' + snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv' + somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz' + reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv' + score_config_snv = params.pipelines_testdata_base_path + 'nallo/reference/rank_model_snv.ini' + variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt' + + // Parameters + parallel_snv = 2 + preset = "revio" + outdir = "$outputDir" + } + } + + then { + assertAll ( + { assert workflow.success }, + // Assert with snapshot multisample + { assert snapshot( + file("$outputDir/pedigree/test.ped"), + file("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv"), + file("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv"), + file("$outputDir/qc_aligned_reads/somalier/relate/test/test.html"), + file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + file("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt"), + // Assert with snapshot HG002_Revio + bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary"), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.dip.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bed"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.copynum.bedgraph"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.depth.bw"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.maf.bw"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.global.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.region.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.summary.txt"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.fasta.gz"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.fasta.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.dip.vcf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.pair.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.vcf.gz.tbi"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio/HG002_Revio_modkit_pileup.bed.gz"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio/HG002_Revio_modkit_pileup.bed.gz.tbi"), + bam("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.arrow"), + file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.arrow"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.regions.bed.gz"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.regions.bed.gz.csi"), + file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio/HG002_PacBio_Revio.fastq.gz.tsv.zst"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz.tbi"), + bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/snvs/stats/single_sample/HG002_Revio.vcf.gz.bcftools_stats.txt").readLines()[0..2], + ).match() }, + // Assert exists multisample + { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() }, + // Assert exists HG002_Revio + { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.sam.gz").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.sam.gz").exists() }, + { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.log").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_1.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_1.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_2.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_2.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_ungrouped.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.bam.bai").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio_paraphase_vcfs/HG002_Revio_hba.vcf.gz").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio_paraphase_vcfs/HG002_Revio_hba.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.json").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.txt").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.txt").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.html").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.zip").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam.bai").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio/HG002_Revio_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio/HG002_Revio_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.snf").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.vcf.gz.tbi").exists() } + ) + } + } + + test("test profile - multisample") { + tag "samplesheet_multisample_bam" + + when { + params { + // Base directory for genomic-medicine-sweden/nallo test data + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/' + // Test files + fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz' + input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam.csv' + bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed' + hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed' + hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed' + hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed' + par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed' + trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed' + variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json' + vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz' + vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv' + snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv' + somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz' + reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv' + score_config_snv = params.pipelines_testdata_base_path + 'nallo/reference/rank_model_snv.ini' + variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt' + + // Parameters + parallel_snv = 2 + preset = "revio" + outdir = "$outputDir" + } + } + + then { + assertAll ( + { assert workflow.success }, + // Assert with snapshot multisample + { assert snapshot( + file("$outputDir/pedigree/test.ped"), + file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + // Assert with snapshot HG002_Revio_A + bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary"), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.dip.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bed"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.copynum.bedgraph"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.depth.bw"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.maf.bw"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.global.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.region.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.summary.txt"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.fasta.gz"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.fasta.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.dip.vcf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.pair.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.vcf.gz.tbi"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_A/HG002_Revio_A_modkit_pileup.bed.gz"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_A/HG002_Revio_A_modkit_pileup.bed.gz.tbi"), + bam("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.arrow"), + file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.arrow"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.regions.bed.gz"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.regions.bed.gz.csi"), + file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio_A/HG002_PacBio_Revio.bam_other.fastq.gz.tsv.zst"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz.tbi"), + bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt").readLines()[0..2], + // Assert with snapshot HG002_Revio_B + bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary"), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.dip.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bed"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.copynum.bedgraph"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.depth.bw"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.maf.bw"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.global.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.region.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.summary.txt"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.fasta.gz"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.fasta.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.dip.vcf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.pair.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.vcf.gz.tbi"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_B/HG002_Revio_B_modkit_pileup.bed.gz"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_B/HG002_Revio_B_modkit_pileup.bed.gz.tbi"), + bam("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.arrow"), + file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.arrow"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.regions.bed.gz"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.regions.bed.gz.csi"), + file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio_B/HG002_Revio_B.merged.fastq.gz.tsv.zst"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz.tbi"), + bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/snvs/stats/single_sample/HG002_Revio_B.vcf.gz.bcftools_stats.txt").readLines()[0..2], + ).match() }, + // Assert exists multisample - note the trgt multisample that doesn't exist in singlesample + { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() }, + { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv").exists() }, + { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv").exists() }, + { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.html").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() }, + // Assert exists HG002_Revio_A + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.sam.gz").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.sam.gz").exists() }, + { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.log").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_1.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_1.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_2.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_2.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_ungrouped.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.bam.bai").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A_paraphase_vcfs/HG002_Revio_A_hba.vcf.gz").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A_paraphase_vcfs/HG002_Revio_A_hba.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.json").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.txt").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.txt").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.html").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.zip").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam.bai").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_A/HG002_Revio_A_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_A/HG002_Revio_A_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.snf").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.vcf.gz.tbi").exists() }, + // Assert exists HG002_Revio_B + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.sam.gz").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.sam.gz").exists() }, + { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.log").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_1.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_1.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_2.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_2.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_ungrouped.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.bam.bai").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B_paraphase_vcfs/HG002_Revio_B_hba.vcf.gz").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B_paraphase_vcfs/HG002_Revio_B_hba.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.json").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.txt").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.txt").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.html").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.zip").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam.bai").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_B/HG002_Revio_B_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_B/HG002_Revio_B_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.snf").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.vcf.gz.tbi").exists() } + ) + } + } +} diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 00000000..a3412aba --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,150 @@ +{ + "test profile": { + "content": [ + "test.ped:md5,bd5cec27ba7337a85cf98e787131e2b5", + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595", + "test.samples.tsv:md5,1685dc6cb8c6b9806ca636662980d686", + "test.html:md5,d05e0eceb70ada3a0c25f99a16ad1889", + "multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350", + "multiqc_fastqc.txt:md5,c60b523b5f11c07c9149043bf68f92ad", + "multiqc_somalier.txt:md5,20b4c5b2d5b94b77fb800548e07a874e", + "74b4822241bd8d1bc42f494f1f3e326c", + "HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary:md5,ccaad2690abccadc4ec3b2c5d8fa4b05", + "HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary:md5,c5bbeabb571453186a39cf6e487dbcc5", + "67fc08c5db63d417992aa4842a567c2d", + "28f964b0683d285fabc5407af0f28580", + "HG002_Revio.dip.bed:md5,5c0ad25a4bb82b8ce06f526664ffbd1c", + "HG002_Revio.hap1.bed:md5,28ac0570c41a83c231f2c853918d24c5", + "HG002_Revio.hap2.bed:md5,8b9a998402277ef043718f95a5410fe1", + "HG002_Revio.HG002_Revio.copynum.bedgraph:md5,517bc59c1b235490c79aa8319437b033", + "HG002_Revio.HG002_Revio.depth.bw:md5,20757c985f4713b8568dee05765db701", + "HG002_Revio.HG002_Revio.maf.bw:md5,4699d7a664277863f06eb48c3cba3c41", + "HG002_Revio.mosdepth.global.dist.txt:md5,6186315d4d65eda85553af82a98829d1", + "HG002_Revio.mosdepth.region.dist.txt:md5,c6c7ee8f056b8b2b92c97ec472b1db16", + "HG002_Revio.mosdepth.summary.txt:md5,35c51f1ad9d2856d1e6446205f19a8e3", + "HG002_Revio.asm.bp.hap1.p_ctg.fasta.gz:md5,6c040d554c3310e1555c928a68fca9f2", + "HG002_Revio.asm.bp.hap2.p_ctg.fasta.gz:md5,d250ea1f1a1186d1d324c843bc8a3e7a", + "HG002_Revio.dip.vcf.gz:md5,ce28d131a954db1325246fce498eb659", + "HG002_Revio.hap1.paf.gz:md5,493bbf9db4bc541a348de656f29e58f1", + "HG002_Revio.hap1.var.gz:md5,55e7be8fb8fe051725890f1cfaae3692", + "HG002_Revio.hap2.paf.gz:md5,63066bc6d6abcdff9b6916d549077a41", + "HG002_Revio.hap2.var.gz:md5,fb0b9d9708cd907831f9cd7019f1d03d", + "HG002_Revio.pair.vcf.gz:md5,977d60dfdf18e27c06574ec8297e49e4", + "HG002_Revio.HG002_Revio.vcf.gz:md5,771f7d9ecbd161dedfbdd053ceff0d52", + "HG002_Revio.HG002_Revio.vcf.gz.tbi:md5,65d6930f79e626a5542091b7a419dc2b", + "HG002_Revio_modkit_pileup.bed.gz:md5,8850d8414ecefc2341452f23ce5e6870", + "HG002_Revio_modkit_pileup.bed.gz.tbi:md5,af7e0b1699351d6df3d93d223d5c3da9", + "fcc3f220bb09d3264f97e7f620779be0", + "HG002_Revio_cramino_aligned_phased.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a", + "HG002_Revio_cramino_aligned.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a", + "HG002_Revio.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281", + "HG002_Revio.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab", + "HG002_PacBio_Revio.fastq.gz.tsv.zst:md5,4b073293b3e771d19b4cfdb07909571b", + "HG002_Revio_sorted.vcf.gz:md5,fbb5699b8f74fc105fb154e8fac7bfea", + "HG002_Revio_sorted.vcf.gz.tbi:md5,0466518ee265ba63160ed27cee0dec88", + "65999ab8f2bc7841de8172468bf23ab6", + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats HG002_Revio.vcf.gz", + "#" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-29T10:42:09.011660883" + }, + "test profile - multisample": { + "content": [ + "test.ped:md5,db74c6787a92a70ffaab766fa4d7a873", + "multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350", + "multiqc_fastqc.txt:md5,b74145ef9fbf8addcc2997ca26b3aa45", + "74b4822241bd8d1bc42f494f1f3e326c", + "HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary:md5,ccaad2690abccadc4ec3b2c5d8fa4b05", + "HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary:md5,c5bbeabb571453186a39cf6e487dbcc5", + "67fc08c5db63d417992aa4842a567c2d", + "28f964b0683d285fabc5407af0f28580", + "HG002_Revio_A.dip.bed:md5,5c0ad25a4bb82b8ce06f526664ffbd1c", + "HG002_Revio_A.hap1.bed:md5,28ac0570c41a83c231f2c853918d24c5", + "HG002_Revio_A.hap2.bed:md5,8b9a998402277ef043718f95a5410fe1", + "HG002_Revio_A.HG002_Revio_A.copynum.bedgraph:md5,517bc59c1b235490c79aa8319437b033", + "HG002_Revio_A.HG002_Revio_A.depth.bw:md5,20757c985f4713b8568dee05765db701", + "HG002_Revio_A.HG002_Revio_A.maf.bw:md5,4699d7a664277863f06eb48c3cba3c41", + "HG002_Revio_A.mosdepth.global.dist.txt:md5,6186315d4d65eda85553af82a98829d1", + "HG002_Revio_A.mosdepth.region.dist.txt:md5,c6c7ee8f056b8b2b92c97ec472b1db16", + "HG002_Revio_A.mosdepth.summary.txt:md5,35c51f1ad9d2856d1e6446205f19a8e3", + "HG002_Revio_A.asm.bp.hap1.p_ctg.fasta.gz:md5,6c040d554c3310e1555c928a68fca9f2", + "HG002_Revio_A.asm.bp.hap2.p_ctg.fasta.gz:md5,d250ea1f1a1186d1d324c843bc8a3e7a", + "HG002_Revio_A.dip.vcf.gz:md5,d97b3e520a3bf6f85eb6b5dd66961e71", + "HG002_Revio_A.hap1.paf.gz:md5,493bbf9db4bc541a348de656f29e58f1", + "HG002_Revio_A.hap1.var.gz:md5,55e7be8fb8fe051725890f1cfaae3692", + "HG002_Revio_A.hap2.paf.gz:md5,63066bc6d6abcdff9b6916d549077a41", + "HG002_Revio_A.hap2.var.gz:md5,fb0b9d9708cd907831f9cd7019f1d03d", + "HG002_Revio_A.pair.vcf.gz:md5,8fd0eb40954ed6d829d0632d0cf54414", + "HG002_Revio_A.HG002_Revio_A.vcf.gz:md5,e575fbadcd826243561192310f234c82", + "HG002_Revio_A.HG002_Revio_A.vcf.gz.tbi:md5,e27fe63e51a2cfaec020ad0a1aec4a7f", + "HG002_Revio_A_modkit_pileup.bed.gz:md5,8850d8414ecefc2341452f23ce5e6870", + "HG002_Revio_A_modkit_pileup.bed.gz.tbi:md5,af7e0b1699351d6df3d93d223d5c3da9", + "fcc3f220bb09d3264f97e7f620779be0", + "HG002_Revio_A_cramino_aligned_phased.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a", + "HG002_Revio_A_cramino_aligned.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a", + "HG002_Revio_A.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281", + "HG002_Revio_A.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab", + "HG002_PacBio_Revio.bam_other.fastq.gz.tsv.zst:md5,4b073293b3e771d19b4cfdb07909571b", + "HG002_Revio_A_sorted.vcf.gz:md5,680938d6ebeafe73d8df0b21c0310276", + "HG002_Revio_A_sorted.vcf.gz.tbi:md5,a6554ab817e7c232a1554ea85fa00151", + "65999ab8f2bc7841de8172468bf23ab6", + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats HG002_Revio_A.vcf.gz", + "#" + ], + "fe7bb70701d1100b2874c10a512a2144", + "HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary:md5,4941730ceacb4012e771208be7a6673a", + "HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary:md5,be7dcb093d25922b72ef0f7bc1bf0706", + "dc5ae77e003989e86ccbfd14b4ae6c7", + "3608f178b942e804e5588e5bbd9e06ee", + "HG002_Revio_B.dip.bed:md5,865dbd07d7c6b7904176ee25cce91928", + "HG002_Revio_B.hap1.bed:md5,50609668f7b26a9d68d583fdaad9bedd", + "HG002_Revio_B.hap2.bed:md5,865338ef4e621ca1e8efcc0a8a6c64d0", + "HG002_Revio_B.HG002_Revio_B.copynum.bedgraph:md5,22b040fce6bdf6b2e323f1cc1658128d", + "HG002_Revio_B.HG002_Revio_B.depth.bw:md5,26bc36669ae0b066c2462e3c9307860f", + "HG002_Revio_B.HG002_Revio_B.maf.bw:md5,6c5691dbaf6e526f4b8fa0f7147bfc8b", + "HG002_Revio_B.mosdepth.global.dist.txt:md5,c3449580a34c495f537635c3bde4699d", + "HG002_Revio_B.mosdepth.region.dist.txt:md5,98a20c6295a956932c21534cbf5e889c", + "HG002_Revio_B.mosdepth.summary.txt:md5,d4fa8aeabc3b348255e7a2fa0135b1a4", + "HG002_Revio_B.asm.bp.hap1.p_ctg.fasta.gz:md5,bd4ae3d478b26ebaa6b64c6795e09e71", + "HG002_Revio_B.asm.bp.hap2.p_ctg.fasta.gz:md5,688d9e693b580a901632022b26fd9d5f", + "HG002_Revio_B.dip.vcf.gz:md5,544c42489591d02fa0dc143e6cf1c6b8", + "HG002_Revio_B.hap1.paf.gz:md5,467daced5b3897cf74c68251b8f99e3a", + "HG002_Revio_B.hap1.var.gz:md5,fb4efe049800a27b10d080ea5a563fb6", + "HG002_Revio_B.hap2.paf.gz:md5,fadac5cffae4e167ef1c41de929edf1b", + "HG002_Revio_B.hap2.var.gz:md5,6e4fcd586c9a30b46d23a8de1d0efe73", + "HG002_Revio_B.pair.vcf.gz:md5,987d192995729f82247dffdcea2dd60f", + "HG002_Revio_B.HG002_Revio_B.vcf.gz:md5,59773d5836f76b36dc1bf75e32186898", + "HG002_Revio_B.HG002_Revio_B.vcf.gz.tbi:md5,1408aeaca23a00eee979101eb45d887d", + "HG002_Revio_B_modkit_pileup.bed.gz:md5,e50e61d644ad3219b3088b1c0d701862", + "HG002_Revio_B_modkit_pileup.bed.gz.tbi:md5,ae29296657f0d2c7451d285ec49f8675", + "a8a1403ba192f971638c870a2be35700", + "HG002_Revio_B_cramino_aligned_phased.arrow:md5,bdd385c0b007ffc30c6cfdd98b15ed82", + "HG002_Revio_B_cramino_aligned.arrow:md5,bdd385c0b007ffc30c6cfdd98b15ed82", + "HG002_Revio_B.regions.bed.gz:md5,deaca22783bd058cdc8756efa25b5f53", + "HG002_Revio_B.regions.bed.gz.csi:md5,dd9a0d36d71da0d274d1c9ca6f8571ae", + "HG002_Revio_B.merged.fastq.gz.tsv.zst:md5,0641e175a07429b61710329a2eeef450", + "HG002_Revio_B_sorted.vcf.gz:md5,ce617741468f4bc7f504f8f488332098", + "HG002_Revio_B_sorted.vcf.gz.tbi:md5,e255a5ea92885967f0c126bddc8ea3b2", + "6b0cf3f492ce898398835d1102afd369", + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats HG002_Revio_B.vcf.gz", + "#" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-29T10:45:44.41793623" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..3c53747a --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,77 @@ +params { + // Base directory for nf-core/modules test data + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + + // Base directory for genomic-medicine-sweden/nallo test data + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/' + + max_cpus = 4 + max_memory = '15.GB' + max_time = '6.h' + +} + +// Impose same minimum Nextflow version as the pipeline for testing +manifest { + nextflowVersion = '!>=23.04.0' +} + +// Disable all Nextflow reporting options +timeline { enabled = false } +report { enabled = false } +trace { enabled = false } +dag { enabled = false } + +process { + + withLabel: 'process_high' { + cpus = 4 + } + + withLabel: 'process_medium' { + cpus = 2 + } + + withLabel: 'process_low' { + cpus = 1 + } + + withLabel: 'process_single' { + cpus = 1 + } + + withName: '.*:NALLO:ASSEMBLY:HIFIASM' { + + ext.args = '-f0' + } + + withName: '.*:CALL_PARALOGS:PARAPHASE' { + + ext.args = '--gene hba' + } + + withName: '.*:NALLO:PHASING:WHATSHAP_PHASE' { + + ext.args = '--ignore-read-groups --indels --distrust-genotypes --include-homozygous' + } + + withName: '.*:NALLO:SNV_ANNOTATION:ENSEMBLVEP_VEP' { + ext.prefix = { "${meta.id}_vep" } + ext.args = { [ + "--dir_plugins .", + "--plugin LoFtool,LoFtool_scores.txt", + "--plugin pLI,pLI_values.txt", + "--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz", + '--distance 5000', + '--buffer_size 20000', + '--format vcf --max_sv_size 248387328', + '--appris --biotype --cache --canonical --ccds --compress_output bgzip', + '--domains --exclude_predicted --force_overwrite', + '--hgvs --humdiv --no_progress --numbers', + '--polyphen p --protein --offline --regulatory --sift p --symbol --tsl', + '--uniprot --vcf', + '--no_stats' + ].join(' ') } + } + +} diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 9547c48f..ebf26251 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -6,20 +6,24 @@ include { fromSamplesheet } from 'plugin/nf-validation' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { BAM_TO_FASTQ } from '../subworkflows/local/bam_to_fastq' -include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex' -include { ASSEMBLY } from '../subworkflows/local/genome_assembly' -include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling' -include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs' -include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads' -include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/structural_variant_calling' -include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling' -include { CNV } from '../subworkflows/local/cnv' -include { REPEAT_ANALYSIS } from '../subworkflows/local/repeat_analysis' -include { METHYLATION } from '../subworkflows/local/methylation' -include { PHASING } from '../subworkflows/local/phasing' -include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation' +include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' +include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions' +include { ASSEMBLY } from '../subworkflows/local/genome_assembly' +include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling' +include { CONVERT_INPUT_FILES } from '../subworkflows/local/convert_input_files' +include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex' +include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs' +include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions' +include { CNV } from '../subworkflows/local/cnv' +include { METHYLATION } from '../subworkflows/local/methylation' +include { PHASING } from '../subworkflows/local/phasing' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads' +include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' +include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' +include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling' +include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation' +include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/structural_variant_calling' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -28,22 +32,25 @@ include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotatio */ // local -include { FQCRS } from '../modules/local/fqcrs' -include { CONVERT_ONT_READ_NAMES } from '../modules/local/convert_ont_read_names' -include { BUILD_INTERVALS } from '../modules/local/build_intervals/main' -include { SPLIT_BED_CHUNKS } from '../modules/local/split_bed_chunks/main' -include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main' +include { CREATE_PEDIGREE_FILE as SAMPLESHEET_PED } from '../modules/local/create_pedigree_file' +include { CREATE_PEDIGREE_FILE as SOMALIER_PED } from '../modules/local/create_pedigree_file' +include { ECHTVAR_ENCODE } from '../modules/local/echtvar/encode/main' +include { FQCRS } from '../modules/local/fqcrs' +include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main' // nf-core -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/' -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nallo_pipeline' +include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_PLUGINSPLIT } from '../modules/nf-core/bcftools/pluginsplit/main' +include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { SPLITUBAM } from '../modules/nf-core/splitubam/main' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nallo_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -57,124 +64,146 @@ workflow NALLO { ch_input main: + ch_vep_cache = Channel.value([]) ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - // Optional input files - ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { it -> [it.simpleName, it] }.collect() - : '' - ch_extra_snfs = params.extra_snfs ? Channel.fromSamplesheet('extra_snfs') - : Channel.empty() - ch_extra_gvcfs = params.extra_gvcfs ? Channel.fromSamplesheet('extra_gvcfs') - : Channel.empty() - ch_tandem_repeats = params.tandem_repeats ? Channel.fromPath(params.tandem_repeats).map{ [ it.getSimpleName(), it]}.collect() - : Channel.value([[],[]]) - ch_bed = params.bed ? Channel.fromPath(params.bed).map{ [ it.getSimpleName(), it]}.collect() - : Channel.empty() - ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ it.getSimpleName(), it]}.collect() - : Channel.value([[],[]]) - - // Conditional input files that has to be set depending on which workflow is run - ch_par = params.dipcall_par ? Channel.fromPath(params.dipcall_par).collect() - : '' - ch_trgt_bed = params.trgt_repeats ? Channel.fromPath(params.trgt_repeats).collect() - : '' - ch_databases = params.snp_db ? Channel.fromSamplesheet('snp_db', immutable_meta: false).map{it[1]}.collect() - : '' - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) - ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect() - : '' - ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect() - : '' - ch_exclude_bed = params.hificnv_exclude ? Channel.fromPath(params.hificnv_exclude).collect() - : '' - ch_somalier_sites = params.somalier_sites ? Channel.fromPath(params.somalier_sites).map { [it.getSimpleName(), it ] }.collect() - : '' + // Optional input files that has to be set depending on which workflow is run + ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() + ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() + : '' + ch_cadd_prescored = params.cadd_prescored ? Channel.fromPath(params.cadd_prescored).collect() + : '' + ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { it -> [ it.simpleName, it ] }.collect() + : '' + ch_tandem_repeats = params.tandem_repeats ? Channel.fromPath(params.tandem_repeats).map{ [ it.simpleName, it ] }.collect() + : Channel.value([[],[]]) + ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ [ id:it.simpleName ] , it ] }.collect() + : Channel.value([[],[]]) + ch_par = params.par_regions ? Channel.fromPath(params.par_regions).map { [ [ id: it.simpleName ], it ] }.collect() + : '' + ch_trgt_bed = params.trgt_repeats ? Channel.fromPath(params.trgt_repeats).map { it -> [ it.simpleName, it ] }.collect() + : '' + ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [ it.simpleName, it ] }.collect() + : '' + ch_databases = params.snp_db ? Channel.fromSamplesheet('snp_db', immutable_meta: false).map{ it[1] }.collect() + : '' + ch_variant_consequences_snv = params.variant_consequences_snv ? Channel.fromPath(params.variant_consequences_snv).collect() + : Channel.value([]) + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect() + : Channel.value([[],[]]) + ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() + : '' + ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect() + : '' + ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect() + : '' + ch_exclude_bed = params.hificnv_exclude ? Channel.fromPath(params.hificnv_exclude).collect() + : '' + ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() + : Channel.value([]) + ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() + : Channel.value([]) + ch_somalier_sites = params.somalier_sites ? Channel.fromPath(params.somalier_sites).map { [ it.simpleName, it ] }.collect() + : '' // Check parameter that doesn't conform to schema validation here - if (params.split_fastq != 0 && (params.split_fastq < 2 || params.split_fastq > 999 )) { exit 1, '--split_fastq must be 0, or between 2 and 999'} - if (params.parallel_snv == 0 ) { exit 1, '--parallel_snv must be > 0'} - - // Create PED from samplesheet - ch_pedfile = ch_input.toList().map { file(CustomFunctions.makePed(it, params.outdir)) } + if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" } + + // Read and store paths in the vep_plugin_files file + if (params.vep_plugin_files) { + ch_vep_extra_files_unsplit.splitCsv ( header:true ) + .map { row -> + path = file(row.vep_files[0]) + if(path.isFile() || path.isDirectory()){ + return [path] + } else { + error("\nVep database file ${path} does not exist.") + } + } + .collect() + .set {ch_vep_extra_files} + } // - // Main workflow + // Convert BAM files to FASTQ and vice versa // - BAM_TO_FASTQ ( ch_input ) - ch_versions = ch_versions.mix(BAM_TO_FASTQ.out.versions) - - BAM_TO_FASTQ.out.fastq - .set { ch_sample } + CONVERT_INPUT_FILES ( ch_input ) + ch_versions = ch_versions.mix(CONVERT_INPUT_FILES.out.versions) + // + // Run raw (unaligned) read QC with FastQC and fqcrs + // if(!params.skip_raw_read_qc) { - // Cat samples with multiple input files before QC - still not ideal - ch_sample + // Combine samples with multiple input files before QC - not ideal + CONVERT_INPUT_FILES.out.fastq .groupTuple() .branch { meta, reads -> single: reads.size() == 1 return [ meta, reads[0] ] multiple: reads.size() > 1 } - .set { ch_sample_reads } + .set { ch_fastq } - CAT_FASTQ ( ch_sample_reads.multiple ) + CAT_FASTQ ( ch_fastq.multiple ) ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) - ch_sample_reads.single + ch_fastq.single .concat ( CAT_FASTQ.out.reads ) .set { raw_read_qc_in } - FASTQC( raw_read_qc_in ) + FASTQC ( raw_read_qc_in ) ch_versions = ch_versions.mix(FASTQC.out.versions) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - FQCRS( raw_read_qc_in ) + FQCRS ( raw_read_qc_in ) ch_versions = ch_versions.mix(FQCRS.out.versions) } + // + // Prepare references + // if(!params.skip_mapping_wf | !params.skip_assembly_wf ) { - // Index genome - PREPARE_GENOME( ch_fasta, ch_vep_cache_unprocessed ) + + PREPARE_GENOME ( + ch_fasta, + ch_vep_cache_unprocessed, + ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + if(!params.skip_snv_annotation) { + if (params.vep_cache) { + if (params.vep_cache.endsWith("tar.gz")) { + ch_vep_cache = PREPARE_GENOME.out.vep_resources + } else { + ch_vep_cache = Channel.fromPath(params.vep_cache).collect() + } + } + } + // Gather indices fasta = PREPARE_GENOME.out.fasta fai = PREPARE_GENOME.out.fai mmi = PREPARE_GENOME.out.mmi } - // Move this inside prepare genome? - - // If no BED-file is provided then build intervals from reference - if(!params.bed) { - fai - .map{ name, fai -> [['id':name], fai] } - .set{ ch_build_intervals_in } - - BUILD_INTERVALS( ch_build_intervals_in ) - ch_versions = ch_versions.mix(BUILD_INTERVALS.out.versions) - - BUILD_INTERVALS.out.bed - .set{ ch_bed } - } - + // + // (Split input files and), map reads to reference and merge into a single BAM per sample + // if(!params.skip_mapping_wf) { - // Split fastq - if (params.split_fastq > 0) { + // Split input files for alignment + if (params.parallel_alignments > 1) { - FASTP( ch_sample, [], [], [] ) - ch_versions = ch_versions.mix(FASTP.out.versions) + SPLITUBAM ( CONVERT_INPUT_FILES.out.bam ) + ch_versions = ch_versions.mix(SPLITUBAM.out.versions) - reads_for_alignment = FASTP.out.reads.transpose() + reads_for_alignment = SPLITUBAM.out.bam.transpose() } else { - reads_for_alignment = ch_sample + reads_for_alignment = CONVERT_INPUT_FILES.out.bam } - // Align (split) reads MINIMAP2_ALIGN ( reads_for_alignment, mmi, true, 'bai', false, false ) ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) @@ -194,36 +223,89 @@ workflow NALLO { } .set { bam_to_merge } - // Merge files if we have mutiple files per sample - SAMTOOLS_MERGE( bam_to_merge.multiple.map { meta, bam, bai -> [ meta, bam ] }, [[],[]], [[],[]], 'bai' ) + // Merge files if we have multiple files per sample + SAMTOOLS_MERGE ( bam_to_merge.multiple.map { meta, bam, bai -> [ meta, bam ] }, [[],[]], [[],[]], 'bai' ) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) // Combine merged with unmerged bams SAMTOOLS_MERGE.out.bam .join(SAMTOOLS_MERGE.out.index) - .concat( bam_to_merge.single ) + .concat(bam_to_merge.single) + .map { meta, bam, bai -> [ meta - meta.subMap('n_files'), bam, bai ] } .set { bam_infer_sex_in } - // Infer sex if sex unknown - BAM_INFER_SEX ( bam_infer_sex_in, fasta, fai, ch_somalier_sites, ch_pedfile ) + // + // Create PED from samplesheet + // + ch_input + .map { meta, files -> [ meta.project, meta ] } + .groupTuple() + .set { ch_samplesheet_ped_in } + + SAMPLESHEET_PED ( ch_samplesheet_ped_in ) + ch_versions = ch_versions.mix(SAMPLESHEET_PED.out.versions) + + SAMPLESHEET_PED.out.ped + .map { project, ped -> [ [ 'id': project ], ped ] } + .collect() + .set { ch_samplesheet_pedfile } + + // + // Check sex and relatedness, and update with infered sex if the sex for a sample is unknown + // + BAM_INFER_SEX ( bam_infer_sex_in, fasta, fai, ch_somalier_sites, ch_samplesheet_pedfile ) + ch_versions = ch_versions.mix(BAM_INFER_SEX.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BAM_INFER_SEX.out.somalier_samples.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(BAM_INFER_SEX.out.somalier_pairs.map{it[1]}.collect().ifEmpty([])) - ch_versions = ch_versions.mix(BAM_INFER_SEX.out.versions) bam = BAM_INFER_SEX.out.bam bai = BAM_INFER_SEX.out.bai bam_bai = BAM_INFER_SEX.out.bam_bai - // Only compatible with hg38 (and a few hg19 genes) + // + // Create PED with updated sex + // + bam + .map { meta, files -> [ meta.project, meta ] } + .groupTuple() + .set { ch_somalier_ped_in } + + SOMALIER_PED ( ch_somalier_ped_in ) + ch_versions = ch_versions.mix(SOMALIER_PED.out.versions) + + SOMALIER_PED.out.ped + .map { project, ped -> [ [ 'id': project ], ped ] } + .collect() + .set { ch_updated_pedfile } + + // + // Run aligned read QC with mosdepth and cramino + // + if (!params.skip_aligned_read_qc) { + QC_ALIGNED_READS( bam_bai, fasta, ch_input_bed ) + ch_versions = ch_versions.mix(QC_ALIGNED_READS.out.versions) + + ch_multiqc_files = ch_multiqc_files.mix( QC_ALIGNED_READS.out.mosdepth_summary.collect { it[1] } ) + ch_multiqc_files = ch_multiqc_files.mix( QC_ALIGNED_READS.out.mosdepth_global_dist.collect { it[1] } ) + ch_multiqc_files = ch_multiqc_files.mix( QC_ALIGNED_READS.out.mosdepth_region_dist.collect { it[1] }.ifEmpty([]) ) + } + + // + // Call paralogous genes with paraphase + // if(!params.skip_call_paralogs) { CALL_PARALOGS ( bam_bai, fasta ) + ch_versions = ch_versions.mix(CALL_PARALOGS.out.versions) } - // Assembly workflow + // + // Hifiasm assembly and assembly variant calling + // if(!params.skip_assembly_wf) { //Hifiasm assembly - ASSEMBLY( ch_sample ) + ASSEMBLY( CONVERT_INPUT_FILES.out.fastq ) ch_versions = ch_versions.mix(ASSEMBLY.out.versions) // Update assembly variant calling meta with sex from somalier @@ -242,109 +324,182 @@ workflow NALLO { ch_versions = ch_versions.mix(ASSEMBLY_VARIANT_CALLING.out.versions) } - // TODO: parallel_snv should only be allowed when snv calling is active - // TODO: move inside PREPARE GENOME, but only run if(parallel_snv > 1) - // Split BED/Genome into equal chunks - // 13 is a good number since no bin is larger than chr1 & it will not overload SLURM - - SPLIT_BED_CHUNKS(ch_bed, params.parallel_snv) - ch_versions = ch_versions.mix(SPLIT_BED_CHUNKS.out.versions) - - // Combine to create a bam_bai - chunk pair for each sample - // Do this here, pre-process or inside SNV-calling? - bam_bai - .combine(SPLIT_BED_CHUNKS.out - .split_beds - .flatten()) - .set{ ch_snv_calling_in } - - QC_ALIGNED_READS( bam_bai, fasta, ch_input_bed ) - ch_versions = ch_versions.mix(QC_ALIGNED_READS.out.versions) - + // // Call SVs with Sniffles2 - STRUCTURAL_VARIANT_CALLING( bam_bai , ch_extra_snfs, fasta, fai, ch_tandem_repeats ) + STRUCTURAL_VARIANT_CALLING( bam_bai, fasta, fai, ch_tandem_repeats ) ch_versions = ch_versions.mix(STRUCTURAL_VARIANT_CALLING.out.versions) + // + // Call (and annotate and rank) SNVs + // if(!params.skip_short_variant_calling) { - // Call SNVs with DeepVariant/DeepTrio - SHORT_VARIANT_CALLING( ch_snv_calling_in , ch_extra_gvcfs, fasta, fai, ch_bed ) + + // + // Make BED intervals, to be used for parallel SNV calling + // + SCATTER_GENOME ( + fai, + ch_input_bed, + !params.bed, + !params.skip_short_variant_calling, + params.parallel_snv + ) + ch_versions = ch_versions.mix(SCATTER_GENOME.out.versions) + + // Combine to create a bam_bai - interval pair for each sample + bam_bai + .combine( SCATTER_GENOME.out.bed_intervals ) + .map { meta, bam, bai, bed, intervals -> + [ meta + [ num_intervals: intervals ], bam, bai, bed ] + } + .set{ ch_snv_calling_in } + + // + // This subworkflow calls SNVs with DeepVariant and outputs: + // 1. A merged and normalised VCF, containing one sample with all regions, to be used in downstream subworkflows requiring SNVs. + // 2. A merged and normalised VCF, containing one region with all samples, to be used in annotation and ranking. + // + SHORT_VARIANT_CALLING( ch_snv_calling_in, fasta, fai, SCATTER_GENOME.out.bed, ch_par ) ch_versions = ch_versions.mix(SHORT_VARIANT_CALLING.out.versions) + // + // Annotate SNVs + // if(!params.skip_snv_annotation) { - def ch_vep_cache - - if (params.vep_cache) { - if (params.vep_cache.endsWith("tar.gz")) { - ch_vep_cache = PREPARE_GENOME.out.vep_resources - } else { - ch_vep_cache = Channel.fromPath(params.vep_cache).collect() - } - } else { - ch_vep_cache = Channel.value([]) - } - + // + // Annotates one multisample VCF per variant call region + // SNV_ANNOTATION( SHORT_VARIANT_CALLING.out.combined_bcf, - SHORT_VARIANT_CALLING.out.snp_calls_vcf, ch_databases, fasta, + fai.map { name, fai -> [ [ id: name ], fai ] }, ch_vep_cache, - params.vep_cache_version + params.vep_cache_version, + ch_vep_extra_files, + (params.cadd_resources && params.cadd_prescored), + ch_cadd_header, + ch_cadd_resources, + ch_cadd_prescored ) ch_versions = ch_versions.mix(SNV_ANNOTATION.out.versions) + + ANN_CSQ_PLI_SNV ( + SNV_ANNOTATION.out.vcf, + ch_variant_consequences_snv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions) + + // + // Ranks one multisample VCF per variant call region + // + if(!params.skip_rank_variants) { + // Only run if we have affected individuals + RANK_VARIANTS_SNV ( + ANN_CSQ_PLI_SNV.out.vcf_ann.filter { meta, vcf -> meta.contains_affected }, + ch_updated_pedfile.map { meta, ped -> ped }, + ch_reduced_penetrance, + ch_score_config_snv + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions) + + // If there are affected individuals and RANK_VARIANTS has been run, + // input that to VCF concatenation + RANK_VARIANTS_SNV.out.vcf + .join( RANK_VARIANTS_SNV.out.tbi ) + .set { ch_vcf_tbi_per_region } + } else { + // otherwise grab the VCF that should have gone into RANK_VARIANTS + ANN_CSQ_PLI_SNV.out.vcf_ann + .join( ANN_CSQ_PLI_SNV.out.tbi_ann ) + .set { ch_vcf_tbi_per_region } + } + } else { + // If neither snv_annotation nor rank_variants was run, take the output from + // SHORT_VARIANT_CALLING + SHORT_VARIANT_CALLING.out.combined_bcf + .join( SHORT_VARIANT_CALLING.out.combined_csi ) + .set { ch_vcf_tbi_per_region } } - if(params.preset != 'ONT_R10') { + ch_vcf_tbi_per_region + .map { meta, vcf, tbi -> [ [ id: meta.project ], vcf, tbi ] } + .groupTuple() + .set { ch_bcftools_concat_in } + + // Concat into a multisample VCF with all regions and publish + BCFTOOLS_CONCAT ( ch_bcftools_concat_in ) + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) + + // Make an echtvar database of all samples + ECHTVAR_ENCODE ( BCFTOOLS_CONCAT.out.vcf ) + ch_versions = ch_versions.mix(ECHTVAR_ENCODE.out.versions) + + // Split multisample VCF to also publish a VCF per sample + BCFTOOLS_PLUGINSPLIT ( BCFTOOLS_CONCAT.out.vcf.join(BCFTOOLS_CONCAT.out.tbi ), [], [], [], [] ) + ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSPLIT.out.versions) + + BCFTOOLS_PLUGINSPLIT.out.vcf + .transpose() + .map { meta, vcf -> [ meta, vcf, [] ] } + .set { ch_bcftools_stats_snv_in } + + BCFTOOLS_STATS ( ch_bcftools_stats_snv_in, [[],[]], [[],[]], [[],[]], [[],[]], [[],[]] ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS.out.stats.collect{it[1]}.ifEmpty([])) + // + // Call CNVs with HiFiCNV + // + if(!params.skip_cnv_calling) { bam_bai .join(SHORT_VARIANT_CALLING.out.snp_calls_vcf) - .groupTuple() .set { cnv_workflow_in } - if(!params.skip_cnv_calling) { - CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed) - ch_versions = ch_versions.mix(CNV.out.versions) - } + CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed) + ch_versions = ch_versions.mix(CNV.out.versions) } - - + // + // Phase SNVs and INDELs + // if(!params.skip_phasing_wf) { - // Phase variants with WhatsHap + PHASING( SHORT_VARIANT_CALLING.out.snp_calls_vcf, STRUCTURAL_VARIANT_CALLING.out.ch_sv_calls_vcf, bam_bai, fasta, fai) ch_versions = ch_versions.mix(PHASING.out.versions) - hap_bam_bai = PHASING.out.haplotagged_bam_bai + ch_multiqc_files = ch_multiqc_files.mix(PHASING.out.stats.collect{it[1]}.ifEmpty([])) + // + // Create methylation pileups with modkit + // if(!params.skip_methylation_wf) { - // Pileup methylation with modkit - METHYLATION( hap_bam_bai, fasta, fai, ch_bed ) + + METHYLATION( PHASING.out.haplotagged_bam_bai, fasta, fai, ch_input_bed ) ch_versions = ch_versions.mix(METHYLATION.out.versions) } - if(!params.skip_repeat_wf) { - // Repeat analysis with TRGT + // + // Call repeat expansions with TRGT + // + if(!params.skip_repeat_calling) { - // Hack read names - if (params.preset == "ONT_R10") { - CONVERT_ONT_READ_NAMES(hap_bam_bai) - ch_versions = ch_versions.mix(CONVERT_ONT_READ_NAMES.out.versions) + CALL_REPEAT_EXPANSIONS ( PHASING.out.haplotagged_bam_bai, fasta, fai, ch_trgt_bed ) + ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) - ch_repeat_analysis_in = CONVERT_ONT_READ_NAMES.out.bam_bai - } else { - ch_repeat_analysis_in = hap_bam_bai + // + // Annotate repeat expansions with stranger + // + if(!params.skip_repeat_annotation) { + ANNOTATE_REPEAT_EXPANSIONS ( ch_variant_catalog, CALL_REPEAT_EXPANSIONS.out.vcf ) + ch_versions = ch_versions.mix(ANNOTATE_REPEAT_EXPANSIONS.out.versions) } - - REPEAT_ANALYSIS( ch_repeat_analysis_in, fasta, fai, ch_trgt_bed ) - ch_versions = ch_versions.mix(REPEAT_ANALYSIS.out.versions) } } } } - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}ifEmpty([])) - // // Collate and save software versions //