diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..4ecfbfe3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.editorconfig b/.editorconfig index b78de6e6..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5ab7fd29..3b7c77be 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/mhcquant then the best place to ask is on the nf-core Slack [#mhcquant](https://nfcore.slack.com/channels/mhcquant) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/mhcquant then the best place to ask is on the nf-core Slack [#mhcquant](https://nfcore.slack.com/channels/mhcquant) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow @@ -116,4 +118,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 9aed7942..7b4e5554 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/mhcquant _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a66a077d..81a85842 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,7 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/mhcq - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/mhcquant/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/mhcquant _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/mhcquant/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/mhcquant _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 7d4ecaa8..31969965 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,18 +14,26 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/mhcquant/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/mhcquant/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 52b781f2..5b9fa054 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/mhcquant/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/mhcquant/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index c7b6a4cd..d4730960 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/mhcquant' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/mhcquant ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/mhcquant ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b8ecffb1..f8ad5eb4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code @@ -37,27 +37,28 @@ jobs: - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --spectrum_batch_size 5000 --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - test_additional_params: - name: Run pipeline with additional params - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/mhcquant') }}" + profile: + name: Run profile tests + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/mhcquant') }} runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false strategy: matrix: - # Nextflow versions include: # Test pipeline minimum Nextflow version - - NXF_VER: "22.10.1" + - NXF_VER: "23.04.0" NXF_EDGE: "" # Test latest edge release of Nextflow - NXF_VER: "" NXF_EDGE: "1" + tests: ["test_deeplc", "test_ms2pip", "test_ionannotator", "test_full"] steps: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Install Nextflow env: NXF_VER: ${{ matrix.NXF_VER }} @@ -67,7 +68,6 @@ jobs: run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with additional params + - name: Run pipeline with profile ${{ matrix.tests }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --predict_class_1 --predict_class_2 --predict_RT --spectrum_batch_size 2000 --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622e..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 22fdb455..1c67b5ea 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d3e7cd..3232c035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,35 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.4.1 nfcore/mhcquant "Maroon Gold Boxer" (patch) - 2023/04/04 +## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/09 + +### `Added` + +- Support for brukers tdf format by adding tdf2mzml converter [#263](https://github.com/nf-core/mhcquant/issues/263) +- DeepLC retention time prediction +- MS2PIP peak intensity prediction +- Added OpenMS FileFilter to clean mzml after parsing to remove artifacts like empty spectra or precursors with charge 0 (optional) +- Made file extension check case insensitive +- Added option to provide a default comet parameters file +- Optimize resource allocations +- Template update 2.9 [#274](https://github.com/nf-core/mhcquant/pull/274) +- Improved quantification such that merged FDR-filtered runs can be quantified properly +- Template update 2.10 [#282](https://github.com/nf-core/mhcquant/pull/282) + +### `Fixed` + +- [#266](https://github.com/nf-core/mhcquant/pull/266) New OpenMS version 3.0.0 fixes duplicated ID bug [#250](https://github.com/nf-core/mhcquant/issues/250) + +### `Dependencies` + +- [#266](https://github.com/nf-core/mhcquant/pull/266) Switched from OpenMS version 2.8.0 to newest version 3.0.0 [#265](https://github.com/nf-core/mhcquant/issues/265) +- [#266](https://github.com/nf-core/mhcquant/pull/266) Bumped ThermoRawFileParser version from 1.4.0 to 1.4.2 + +### `Deprecated` + +- OpenMS RT prediction + +## v2.4.1 nfcore/mhcquant "Young Shark" (patch) - 2023/04/04 ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 844b5077..64807a32 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -26,9 +26,8 @@ > Röst H, Sachsenberg T, Aiche S, Bielow C, Weisser H, Aicheler F, Andreotti S, Ehrlich HC, Gutenbrunner P, Kenar E, Liang X, Nahnsen S, Nilse L, Pfeuffer J, Rosenberger G, Rurik M, Schmitt U, Veit J, Walze M, Wojnar D, Wolski WE, Schilling O, Choudhary JS, Malmström L, Aebersold R, Reinert K, Kohlbacher O. OpenMS: a flexible open-source software platform for mass spectrometry data analysis. Nat Methods 13 741–748 (2016). doi: 10.1038/nmeth.3959. PubMed PMID: 27575624 -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -47,5 +46,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index 43eea689..fa6e5d35 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Leon Bichmann, Marissa Dubbelaar +Copyright (c) Leon Bichmann, Marissa Dubbelaar, Jonas Scheid, Steffen Lemke Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 9e7efe92..679be851 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,15 @@ # ![nf-core/mhcquant](docs/images/nf-core-mhcquant_logo_light.png#gh-light-mode-only) ![nf-core/mhcquant](docs/images/nf-core-mhcquant_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1569909-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1569909) +[![GitHub Actions CI Status](https://github.com/nf-core/mhcquant/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/mhcquant/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/mhcquant/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/mhcquant/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/mhcquant) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mhcquant-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mhcquant)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mhcquant-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mhcquant)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -22,38 +23,40 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/mhcquant/results). -![overview](assets/mhcquant_web.png) +![overview](docs/images/mhcquant_subway.png) -## Quick Start +## Usage -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +> with `-profile test` before running the workflow on actual data. -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. +First, prepare a samplesheet with your input data that looks as follows: -3. Download the pipeline and test it on a minimal dataset with a single command: +`samplesheet.tsv`: - ```bash - nextflow run nf-core/mhcquant -profile test,YOURPROFILE --outdir - ``` +```tsv +ID Sample Condition ReplicateFileName +1 msrun tumor /path/to/msrun.raw|mzML|d +``` - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Each row represents a mass spectrometry run in one of the formats: raw, mzML, d - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +Now, you can run the pipeline using: -4. Start running your own analysis! +```bash +nextflow run nf-core/mhcquant + -profile \ + --input 'samples.tsv' \ + --fasta 'SWISSPROT_2020.fasta' \ + --outdir ./results +``` - ```bash - nextflow run nf-core/mhcquant -profile test, \ - --input 'samples.tsv' \ - --fasta 'SWISSPROT_2020.fasta' \ - --allele_sheet 'alleles.tsv' \ - --predict_class_1 \ - --refine_fdr_on_predicted_subset \ - --outdir ./results - ``` +> [!NOTE] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mhcquant/usage) and the [parameter documentation](https://nf-co.re/mhcquant/parameters). ## Pipeline summary @@ -61,15 +64,34 @@ On release, automated continuous integration tests run the pipeline on a full-si By default the pipeline currently performs the following -- Identification of peptides in the MS/MS spectra using comet (`CometAdapter`) +#### Identification + +- Identification of peptides in the MS/MS spectra using Comet (`CometAdapter`) - Refreshes the protein references for all peptide hits and adds target/decoy information (`PeptideIndexer`) -- Estimates the false discovery rate on peptide and protein level (`FalseDiscoveryRate`) - Filters peptide/protein identification results on ID based alignment (`IDFilter`) -- Converts XML format to text files (`TextExporter`) -- Merges several idXML files into one idXML file (`IDMerger`) -- Extract PSM features for Percolator (`PSMFeatureExtractor`) +- Merges idXML files of a sample-condition group into one idXML file (`IDMerger`) +- Defines extra features for Percolator (`PSMFeatureExtractor`) - Facilitates the input to, the call of and output integration of Percolator (`PercolatorAdapter`) -- Filters peptide/protein identification result (`IDFilter`) +- Filters peptide/protein identification result based on Percolator q-value (`IDFilter`) +- Splits merged idXML file into their respective runs again (`IDRipper`) +- Uses Comet XCorr instead of percolator q-value as primary score for downstream purposess (`IDScoreSwitcher`) +- Keeps peptides observed after FDR filtering in each run and selects the best peptide per run (`Pyopenms_IDFilter`) + +#### Map alignment + +- Corrects retention time distortions between runs, using information from peptides identified in different runs (`MapAlignerIdentification`) +- Applies retention time transformations to runs (`MapRTTransformer`) + +#### Process features + +- Detects features in MS1 data based on peptide identifications (`FeatureFinderIdentification`) +- Group corresponding features across labelfree experiments (`FeatureLinkerUnlabeledKD`) +- Resolves ambiguous annotations of features with peptide identifications (`IDConflictResolver`) + +#### Output + +- Converts XML format to text files (`TextExporter`) +- Converts XML format to mzTab files (`MzTabExporter`) ### Additional Steps @@ -80,12 +102,17 @@ Additional functionality contained by the pipeline currently includes: - Inclusion of proteins in the reference database (`mhcnuggets`, `mhcflurry`, `fred2`) - Create a decoy peptide database from standard FASTA databases (`DecoyDatabase`) - Conversion of raw to mzML files (`ThermoRawFileParser`) +- Conversion of tdf (`.d`) to mzML files (`tdf2mzml`) - Executing the peak picking with high_res algorithm (`PeakPickerHiRes`) -#### Map alignment +#### Additional features for rescoring + +- Retention time prediction (`DeepLC`) +- Peak intensity prediction (`MS2PIP`) -- Corrects retention time distortions between maps, using information from peptides identified in different maps (`MapAlignerIdentification`) -- Applies retention time transformations to maps (`MapRTTransformer`) +> [!WARNING] +> The refine FDR feature will be evaluated on a large benchmark dataset in the following releases. +> Consider it as an experimental feature. #### Refine FDR @@ -93,13 +120,8 @@ Additional functionality contained by the pipeline currently includes: - Predict psm results using mhcflurry to shrink search space (`mhcflurry`) - Facilitates the input to, the call of and output integration of Percolator (`PercolatorAdapter`) -#### Process features - -- Detects features in MS1 data based on peptide identifications (`FeatureFinderIdentification`) -- Group corresponding features across labelfree experiments (`FeatureLinkerUnlabeledKD`) -- Resolves ambiguous annotations of features with peptide identifications (`IDConflictResolver`) -- Converts XML format to text files (`TextExporter`) -- Annotates final list of peptides with their respective ions and charges (`IonAnnotator`) +> [!WARNING] +> The HLA prediction feature is outdated and will be reworked in the following releases #### Prediction of HLA class 1 peptides @@ -108,14 +130,15 @@ Additional functionality contained by the pipeline currently includes: - Predict neoepitopes based on the peptide hits (`mhcnuggets`, `mhcflurry`, `fred2`) - Resolve found neoepitopes (`mhcnuggets`, `mhcflurry`, `fred2`) -#### Prediction retention time +#### Output -- Used to train a model for peptide retention time prediction or peptide separation prediction (`RTModel`) -- Retention Times Predictor Found Peptides and neoepitopes (`RTPredict`) +- Annotates final list of peptides with their respective ions and charges (`IonAnnotator`) ## Documentation -The nf-core/mhcquant pipeline comes with documentation about the pipeline [usage](https://nf-co.re/mhcquant/usage), [parameters](https://nf-co.re/mhcquant/parameters) and [output](https://nf-co.re/mhcquant/output). +To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/mhcquant/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/mhcquant/output). 1. [Nextflow installation](https://nf-co.re/usage/installation) 2. Pipeline configuration @@ -127,7 +150,7 @@ The nf-core/mhcquant pipeline comes with documentation about the pipeline [usage ## Credits -nf-core/mhcquant was originally written by [Leon Bichmann](https://github.com/Leon-Bichmann) from the [Kohlbacher Lab](https://kohlbacherlab.org/). The pipeline was re-written in Nextflow DSL2 and is primarily maintained by [Marissa Dubbelaar](https://github.com/marissaDubbelaar) from [Clinical Collaboration Unit Translational Immunology](https://www.medizin.uni-tuebingen.de/en-de/das-klinikum/einrichtungen/kliniken/medizinische-klinik/kke-translationale-immunologie) and [Quantitative Biology Center](https://uni-tuebingen.de/forschung/forschungsinfrastruktur/zentrum-fuer-quantitative-biologie-qbic/) in Tübingen. +nf-core/mhcquant was originally written by [Leon Bichmann](https://github.com/Leon-Bichmann) from the [Kohlbacher Lab](https://kohlbacherlab.org/). The pipeline was re-written in Nextflow DSL2 and is primarily maintained by [Marissa Dubbelaar](https://github.com/marissaDubbelaar) and [Jonas Scheid](https://github.com/jonasscheid) from [Peptide-based Immunotherapy](https://www.medizin.uni-tuebingen.de/en-de/peptid-basierte-immuntherapie) and [Quantitative Biology Center](https://uni-tuebingen.de/forschung/forschungsinfrastruktur/zentrum-fuer-quantitative-biologie-qbic/) in Tübingen. Helpful contributors: @@ -142,7 +165,7 @@ Helpful contributors: - [Christian Fufezan](https://github.com/fu) - [Sven Fillinger](https://github.com/sven1103) - [Kevin Menden](https://github.com/KevinMenden) -- [Jonas Scheid](https://github.com/jonasscheid) +- [Steffen Lemke](https://github.com/steffenlem) ## Contributions and Support @@ -184,6 +207,14 @@ In addition, references of tools and data used in this pipeline are as follows: > > Käll L. et al, _Nat Methods_ 2007 Nov;4(11):923-5. doi: [10.1038/nmeth1113](https://www.nature.com/articles/nmeth1113). Epub 2007 Oct 21. > +> **Retention time prediction** +> +> Bouwmeester R. et al, _Nature Methods_ 2021 Oct;18(11):1363-1369. doi: [10.1038/s41592-021-01301-5](https://www.nature.com/articles/s41592-021-01301-5) +> +> **MS2 Peak intensity prediction** +> +> Gabriels R. et al, _Nucleic Acids Research_ 2019 Jul;47(W1):W295-9. doi: [10.1093/nar/gkz299](https://academic.oup.com/nar/article/47/W1/W295/5480903) +> > **Identification based RT Alignment** > > Weisser H. et al, _J Proteome Res._ 2013 Apr 5;12(4):1628-44. doi: [10.1021/pr300992u](https://pubs.acs.org/doi/10.1021/pr300992u). Epub 2013 Feb 22. diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index ec4f5008..16222e57 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -6,17 +6,18 @@ plot_type: "html" ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/mhcquant v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/mhcquant v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • Bichmann, L., Nelde, N., Ghosh, M., Heumos, L., Mohr, C., Peltzer, A., Kuchenbecker L., Sachsenberg, T., Walz, J.S., Stevanović, S., & Kohlbacher, O. (2019). MHCquant: Automated and Reproducible Data Analysis for Immunopeptidomics. Journal of Proteome Research, 18 (11), 3876-3884. https://doi.org/10.1021/acs.jproteome.9b00313
  • -
  • O'Donnell, T.J., Rubinsteyn, A., Bonsack, M., Riemer, A.B., Laserson, U., & Hammerbacher, J. (2018). MHCflurry: Open-Source Class I MHC Binding Affinity Prediction. Cell Systems, 7(1), 129-132.e4. https://doi.org/10.1016/j.cels.2018.05.014
  • +
  • O'Donnell, T.J., Rubinsteyn, A., Bonsack, M., Riemer, A.B., Laserson, U., & Hammerbacher, J. (2018). MHCflurry: Open-Source Class I MHC Binding Affinity Prediction. Cell Systems, 7(1), 129-132.e4. https://doi.org/10.1016/j.cels.2018.05.014
  • Shao, X.M., Bhattacharya, R., Huang, J., Sivakumar, A., Tokheim, C., Zheng, L., Hirsch, D., Kaminow, B., Omdahl, A., Bonsack, M., Riemer, A.B., Velculescu, V.E., Anagnostou, V., Pagel, K.A., & Karchin, R. (2018). High-Throughput Prediction of MHC Class I and II Neoantigens with MHCnuggets. Cancer Immunology Research, 8(3), 396-408. https://doi.org/10.1158/2326-6066.CIR-19-0464
  • -
  • Weisser, H., & Choudharay, J.S. (2017). Targeted Feature Detection for Data-Dependent Shotgun Proteomics. Journal of Proteome Research, 16(8), 2964-2974. https://doi.org/10.1021/acs.jproteome.7b00248
  • +
  • Weisser, H., & Choudharay, J.S. (2017). Targeted Feature Detection for Data-Dependent Shotgun Proteomics. Journal of Proteome Research, 16(8), 2964-2974. https://doi.org/10.1021/acs.jproteome.7b00248
  • Schubert, B., Walzer, M., Brachvogel, H-P., Szolek, A., Mohr, C., & Kohlbacher, O. (2016). FRED 2: an immunoinformatics framework for Python. Bioinformatics, 32(13), 2044-6. https://doi.org/10.1093/bioinformatics/btw113
  • Eng, J.K., Hoopmann, M.R., Jahan, T.A., Egertson, J.D., Noble, W.S., & MacCoss, M.J. (2015). A Deeper Look into Comet—Implementation and Features. Journal of the American Society for Mass Spectrometry, 26(11), 1865-74. https://doi.org/10.1007/s13361-015-1179-x
  • Weisser, H., Nahnsen, S., Grossmann, J., Nilse, L., Quandt, A., Brauer, H., Sturm, M., Kenar, E., Kohlbacher, O., Aebersold, R., & Malmström, L. (2013). An Automated Pipeline for High-Throughput Label-Free Quantitative Proteomics. Journal of Proteome Research, 12(4), 1628-44. https://doi.org/10.1021/pr300992u
  • diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 43bfdcf6..04621a17 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,11 @@ +custom_logo: "nf-core-mhcquant_logo_light.png" +custom_logo_url: https://github.com/nf-core/mhcquant +custom_logo_title: "nf-core/mhcquant" + report_comment: > - This report has been generated by the nf-core/mhcquant + This report has been generated by the nf-core/mhcquant analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mhcquant-methods-description": order: -1000 diff --git a/assets/nf-core-mhcquant_logo_light.png b/assets/nf-core-mhcquant_logo_light.png index c470ddb2..6c1dcf66 100644 Binary files a/assets/nf-core-mhcquant_logo_light.png and b/assets/nf-core-mhcquant_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 0d92ade2..6df68dfd 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -23,15 +23,19 @@ }, "ReplicateFileName": { "type": "string", - "errorMessage": "MS file spaces and must have extension '.raw' or '.mzml'", + "errorMessage": "MS file spaces and must have extension '.raw'|'.RAW', '.mzml'|'mzML', or '.d'", "anyOf": [ { "type": "string", - "pattern": "^\\S+-?\\.raw$" + "pattern": "^(?i)\\S+-?\\.raw$" }, { "type": "string", - "pattern": "^\\S+-?\\.mzml$" + "pattern": "^(?i)\\S+-?\\.mzML$" + }, + { + "type": "string", + "pattern": "^\\S+-?\\.d$" } ] } diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..7d7b07dc 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/mhcquant v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/IDFilter.py b/bin/IDFilter.py new file mode 100755 index 00000000..47d6db80 --- /dev/null +++ b/bin/IDFilter.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# Written by Jonas Scheid under the MIT license + +from pyopenms import * +import pandas as pd +import os +import argparse + + +def parse_args() -> argparse.Namespace: + """ + Parse command line arguments. + + :return: parsed arguments + :rtype: argparse.Namespace + """ + parser = argparse.ArgumentParser(description="Filter idXML by a given whitelist of peptides.") + parser.add_argument("--input", required=True, type=str, help="Input idXML file.") + parser.add_argument( + "--whitelist", required=True, type=str, help="IdXML file, which peptide IDs are used as whitelist filter." + ) + parser.add_argument("--output", required=True, type=str, help="Filtered idXML file.") + + return parser.parse_args() + + +def parse_idxml(path: str) -> tuple[list, list]: + """ + Parse idXML file and return PeptideIdentification and ProteinIdentification objects. + + :param path: path to idXML file + :type path: str + :return: ProteinIdentification and PeptideIdentification objects + :rtype: (list, list) + """ + protein_ids = [] + peptide_ids = [] + IdXMLFile().load(path, protein_ids, peptide_ids) + + return protein_ids, peptide_ids + + +def filter_run(protein_ids, peptide_ids, whitelist) -> tuple[list, list]: + """ + Filter Protein and PeptideIdentifications of one run by a whitelist of PeptideIdentifications. + + :param protein_ids: ProteinIdentification objects + :type protein_ids: list + :param peptide_ids: PeptideIdentification objects + :type peptide_ids: list + :param whitelist: PeptideIdentification objects to keep in the run + :type whitelist: list + """ + filter = IDFilter() + ids_to_keep = [ + peptide_id + for peptide_id in peptide_ids + for hit in peptide_id.getHits() + if hit.getSequence().toString() in whitelist + ] + filter.keepPeptidesWithMatchingSequences(peptide_ids, ids_to_keep, ignore_mods=False) + # We only want to have unique peptide sequences + filter.keepBestPerPeptide(peptide_ids, ignore_mods=False, ignore_charges=False, nr_best_spectrum=1) + filter.removeEmptyIdentifications(peptide_ids) + # We only want to have protein accessions that are referenced by the fdr-filtered peptide hits + filter.removeUnreferencedProteins(protein_ids, peptide_ids) + + return protein_ids, peptide_ids + + +def main(): + args = parse_args() + + # Read idXML files of runs + protein_ids, peptide_ids = parse_idxml(args.input) + + # Read file containing peptides to keep + whitelist_protein_ids, whitelist_peptide_ids = parse_idxml(args.whitelist) + # Get string representation of peptide sequences in fdr_filtered_peptides + whitelist_peptides = [hit.getSequence().toString() for id in whitelist_peptide_ids for hit in id.getHits()] + + # Filter runs for peptides only in the fdr_filtered_peptides list + protein_id_filtered, peptide_ids_filtered = filter_run(protein_ids, peptide_ids, whitelist_peptides) + + # Write filtered run to idXML file + IdXMLFile().store(args.output, protein_id_filtered, peptide_ids_filtered) + + +if __name__ == "__main__": + main() diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9146885b..9154eb41 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -25,13 +25,15 @@ class RowChecker: """ - VALID_FORMATS = ( - ".raw", - ".mzML", - ) + VALID_FORMATS = (".raw", ".mzml", ".d") def __init__( - self, id_col="ID", sample_col="Sample", condition_col="Condition", filename_col="ReplicateFileName", **kwargs + self, + id_col="ID", + sample_col="Sample", + condition_col="Condition", + filename_col="ReplicateFileName", + **kwargs, ): """ Initialize the row checker with the expected column names. @@ -91,7 +93,7 @@ def _validate_filename(self, row): def _validate_ms_format(self, filename): """Assert that a given filename has one of the expected MS extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( + assert any(filename.lower().endswith(extension) for extension in self.VALID_FORMATS), ( f"The file has an unrecognized extension: {filename}\n" f"It should be one of: {', '.join(self.VALID_FORMATS)}" ) @@ -141,9 +143,6 @@ def sniff_format(handle): peek = read_head(handle) handle.seek(0) sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) dialect = sniffer.sniff(peek) return dialect diff --git a/bin/deeplc_cli.py b/bin/deeplc_cli.py new file mode 100755 index 00000000..0ab42681 --- /dev/null +++ b/bin/deeplc_cli.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python +# Written by Jonas Scheid and Steffen Lemke + +import click +import logging +import math +import os +import pandas as pd +import numpy as np +import sys +import tensorflow as tf +from deeplc import DeepLC +from pyopenms import IdXMLFile +from sklearn.preprocessing import MinMaxScaler + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Set TensorFlow logging level to suppress warnings +tf.get_logger().setLevel(logging.ERROR) # Filter out specific warnings + +# initate logger +console = logging.StreamHandler() +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +console.setFormatter(formatter) +LOG = logging.getLogger("DeepLC prediction") +LOG.addHandler(console) +LOG.setLevel(logging.INFO) + + +def parse_idxml(path: str) -> tuple[list, list]: + """ + Parse idXML file and return PeptideIdentification and ProteinIdentification objects. + + :param path: path to idXML file + :type path: str + :return: ProteinIdentification and PeptideIdentification objects + :rtype: (list, list) + """ + protein_ids = [] + peptide_ids = [] + IdXMLFile().load(path, protein_ids, peptide_ids) + + return protein_ids, peptide_ids + + +def generate_deeplc_input(peptide_ids: list) -> pd.DataFrame: + """ + Generate input for DeepLC from PeptideIdentification objects. + + :param peptide_ids: list of PeptideIdentification objects + :type peptide_ids: list + :return: Pandas DataFrame containing the input for DeepLC + :rtype: pd.DataFrame + + """ + data = [] + for peptide_id in peptide_ids: + tr = peptide_id.getRT() + scan_id = peptide_id.getMetaValue("spectrum_reference") + for hit in peptide_id.getHits(): + sequence = hit.getSequence() + unmodified_sequence = sequence.toUnmodifiedString() + x_corr = hit.getMetaValue("MS:1002252") + target_decoy = hit.getMetaValue("target_decoy") + + # get all modificaitons + hit_mods = [] + for pos in range(0, sequence.size()): + residue = sequence.getResidue(pos) + if residue.isModified(): + hit_mods.append("|".join([str(pos + 1), residue.getModificationName()])) + if hit_mods == []: + modifications = "" + else: + modifications = "|".join(hit_mods) + + data.append([unmodified_sequence, modifications, tr, x_corr, target_decoy, str(sequence), scan_id]) + + df_deeplc_input = pd.DataFrame( + data, columns=["seq", "modifications", "tr", "x_corr", "target_decoy", "seq_with_mods", "scan_id"] + ) + + return df_deeplc_input + + +def generate_calibration_df(df: pd.DataFrame, num_bins: int) -> pd.DataFrame: + """ + Generates a pandas DataFrame containing calibration peptides for DeepLC. + The input DataFrame is sorted by measured retention time and sliced into + bins of equal peptide count. For each bin the peptides with the highest + x_correlation is selected and returned in a Pandas DataFrame + + :param df: Input DataFrame with retention time of each peptide and xcorr score + :type df: pd.DataFrame + :param num_bins: Number of bins/number of calibratoin peptides to be extracted + :type num_bins: int + :return: Pandas DataFrame containing calibration peptides equal to index-based num_bins + :rtype: pd.DataFrame + + """ + # remove decoys + df = df[df["target_decoy"] != "decoy"] + + # Compute the bin size based on the number of bins + bin_size = len(df) // num_bins + + # Sort the dataframe by tr values + sorted_df = df.sort_values("tr") + + # Rows for dataframe + filtered_row = [] + + # Iterate over the bins + for i in range(num_bins): + # Get the start and end indices of the current bin + start_index = i * bin_size + end_index = start_index + bin_size + + # Get the subset of the dataframe for the current bin + bin_df = sorted_df.iloc[start_index:end_index] + + # Find the row with the maximum x_corr value in the current bin + max_row = bin_df.loc[bin_df["x_corr"].idxmax()] + + # Append the max row to the filtered dataframe + filtered_row.append(max_row) + + # Create DataFrame + calibration_df = pd.DataFrame(filtered_row) + + return calibration_df.copy() + + +def generate_calibration_df_with_RT_bins(df: pd.DataFrame, num_bins: int) -> pd.DataFrame: + """ + Generates a pandas DataFrame containing calibration peptides for DeepLC. + The input DataFrame is sorted by measured retention time and sliced into bins of equal retention time. + For each bin the peptides with the highest x_correlation is selected and return in a Pandas DataFrame + + :param df: Input DataFrame with retention time of each peptide and xcorr score + :type df: pd.DataFrame + :param num_bins: Number of bins/number of calibratoin peptides to be extracted + :type num_bins: int + :return: Pandas DataFrame containing calibration peptides equal to RT-based num_bins + :rtype: pd.DataFrame + """ + # remove decoys + df = df[df["target_decoy"] != "decoy"] + + # Sort the dataframe by tr values + sorted_df = df.sort_values("tr") + + # Create list of linear bins between min and max tr with num_bins and access dataframe with index + bin_size = (sorted_df["tr"].max() - sorted_df["tr"].min()) / num_bins + + # Rows for dataframe + filtered_row = [] + + # Iterate over the bins + for i in range(num_bins): + # Get the start and end indices of the current bin + start_tr = sorted_df["tr"].min() + i * bin_size + end_tr = start_tr + bin_size + + # Get the subset of the dataframe for the current bin + bin_df = sorted_df[(sorted_df["tr"] >= start_tr) & (sorted_df["tr"] < end_tr)] + + # skip if bin is empty (no measurements in RT bin) + if len(bin_df) == 0: + continue + + # Find the row with the maximum x_corr value in the current bin + max_row = bin_df.loc[bin_df["x_corr"].idxmax()] + + # Append the max row to the filtered dataframe + filtered_row.append(max_row) + + # Create DataFrame + calibration_df = pd.DataFrame(filtered_row) + + return calibration_df + + +def min_max_scaler(df: pd.DataFrame) -> pd.DataFrame: + """ + Scales the predicted retention time values of the input DataFrame to the range of the measured retention time values + + :param df: Input DataFrame with predicted retention time values + :type df: pd.DataFrame + :return: DataFrame with scaled predicted retention time values + :rtype: pd.DataFrame + """ + scaler = MinMaxScaler((min(df["tr"]), max(df["tr"]))) + df["predicted_RT"] = scaler.fit_transform(df[["predicted_RT"]]) + + return df + + +def run_deeplc(df: pd.DataFrame, calibration_df: pd.DataFrame = None) -> pd.DataFrame: + dlc = DeepLC() + if calibration_df is not None: + dlc.calibrate_preds(seq_df=calibration_df) + preds = dlc.make_preds(seq_df=df) + df["predicted_RT"] = preds + else: + preds = dlc.make_preds(seq_df=df, calibrate=False) + df["predicted_RT"] = preds + df = min_max_scaler(df) + + return df + + +def add_rt_error( + peptide_ids: list, + prediction_dict: dict, + add_abs_rt_error: bool = False, + add_sqr_rt_error: bool = False, + add_log_rt_error: bool = False, +) -> list: + """ + Adds the error of the predicted retention time in comparison to the measured retention time to each peptide hit. + Different error scores can be selected. + + :param peptide_ids: list of PeptideIdentification objects + :type peptide_ids: list + :param prediction_dict: dictionary containing the predicted retention time for each peptide sequence + :type prediction_dict: dict + :param add_abs_rt_error: add absolute RT prediction errors to idXML + :type add_abs_rt_error: bool + :param add_sqr_rt_error: add squared RT prediction errors to idXML + :type add_sqr_rt_error: bool + :param add_log_rt_error: add log RT prediction errors to idXML + :type add_log_rt_error: bool + :return: list of PeptideIdentification objects with added error scores + :rtype: list + """ + noncanonical_aa = ["B", "J", "O", "U", "X", "Z"] + peptide_hits_noncanonical_aa = {} + abs_rt_errors = [] + sqr_rt_errors = [] + log_rt_errors = [] + + for peptide_id in peptide_ids: + # Get measured Retention time + measured_rt = peptide_id.getRT() + scan_id = peptide_id.getMetaValue("spectrum_reference") + + # Initilaize list for edited hits (with added features) + new_hits = [] + for hit in peptide_id.getHits(): + sequence = hit.getSequence() + unmodified_sequence = sequence.toUnmodifiedString() + # Catch peptides with noncanonical amino acids and save spectrum reference and hit in dictionary + if any(aa in noncanonical_aa for aa in unmodified_sequence): + peptide_hits_noncanonical_aa[(peptide_id.getMetaValue("spectrum_reference"), sequence)] = hit + continue + + predicted_rt = prediction_dict[(str(sequence), scan_id)] + + # calculate abs error + if add_abs_rt_error: + abs_error = abs(measured_rt - predicted_rt) + hit.setMetaValue("deeplc_abs_error", abs_error) + abs_rt_errors.append(abs_error) + + # calculate seq error + if add_sqr_rt_error: + sqr_error = abs(measured_rt - predicted_rt) ** 2 + hit.setMetaValue("deeplc_sqr_error", sqr_error) + sqr_rt_errors.append(sqr_error) + + # calcultae log error + if add_log_rt_error: + log_error = math.log(abs(measured_rt - predicted_rt)) + hit.setMetaValue("deeplc_log_error", log_error) + log_rt_errors.append(log_error) + + new_hits.append(hit) + peptide_id.setHits(new_hits) + + # Add peptides with noncanonical amino acids to peptide_ids and return the median error + for scan_id, sequence in peptide_hits_noncanonical_aa.keys(): + LOG.info( + f"Peptide {sequence} hit of spectrum {scan_id} contains noncanonical amino acids. Adding median error(s)" + ) + # get peptide id for scan id + peptide_id = [ + peptide_id for peptide_id in peptide_ids if peptide_id.getMetaValue("spectrum_reference") == scan_id + ][0] + hit = peptide_hits_noncanonical_aa[(scan_id, sequence)] + if add_abs_rt_error: + hit.setMetaValue("deeplc_abs_error", np.median(abs_rt_errors)) + if add_sqr_rt_error: + hit.setMetaValue("deeplc_sqr_error", np.median(sqr_rt_errors)) + if add_log_rt_error: + hit.setMetaValue("deeplc_log_error", np.median(log_rt_errors)) + peptide_id.insertHit(hit) + + peptide_ids = peptide_ids + + return peptide_ids + + +@click.command() +@click.option("-i", "--input", help="input path of idXML", required=True) +@click.option("-o", "--output", help="output path of idXML", required=True) +@click.option( + "--calibration_mode", + type=click.Choice(["idx_bin", "rt_bin", "min_max"]), + default="idx_bin", + help="Calibration method", +) +@click.option( + "--calibration_bins", + type=click.IntRange(min=2), + default=20, + help="number of bins for calibration", +) +@click.option( + "--add_abs_rt_error", + is_flag=True, + help="add absolute RT prediction errors to idXML", +) +@click.option( + "--add_sqr_rt_error", + is_flag=True, + help="add squared RT prediction errors to idXML (default if nothing is selected)", +) +@click.option("--add_log_rt_error", is_flag=True, help="add log RT prediction errors to idXML") +@click.option( + "--debug", + is_flag=True, + help="Additionally write out calibration file and deeplc output", +) +def main( + input: str, + output: str, + calibration_mode: str, + calibration_bins: int, + add_abs_rt_error: bool, + add_sqr_rt_error: bool, + add_log_rt_error: bool, + debug: bool, +): + # check if at least one error is selected, if not set squared error to true + num_true = sum([add_abs_rt_error, add_sqr_rt_error, add_log_rt_error]) + if num_true == 0: + LOG.info("No error calculation was set, falling back to squared error") + add_sqr_rt_error = True + + LOG.info("Parse idXML") + protein_ids, peptide_ids = parse_idxml(input) + + LOG.info("Generate DeepLC input") + df_deeplc_input = generate_deeplc_input(peptide_ids) + # Skip sequences with noncanonical amino acids, DeepLC cannot predict them + # Add them later with median error + df_deeplc_input = df_deeplc_input[~df_deeplc_input["seq"].str.contains("B|J|O|U|X|Z")] + + if len(df_deeplc_input[df_deeplc_input["target_decoy"] != "decoy"]) <= calibration_bins: + LOG.info("Number of peptide hits is smaller than calibration bins. Falling back to min/max scaling") + calibration_mode = "min_max" + + # Run DeepLC + if calibration_mode == "rt_bin": + LOG.info("Run DeepLC with RT bin calibration") + calibration_df = generate_calibration_df_with_RT_bins(df_deeplc_input, calibration_bins) + if debug: + calibration_df.to_csv(output + "_calibration.tsv", index=False, sep="\t") + df_deeplc_input.to_csv(output + "_deeplc_input.tsv", index=False, sep="\t") + df_deeplc_output = run_deeplc(df_deeplc_input, calibration_df) + + elif calibration_mode == "idx_bin": + LOG.info("Run DeepLC with index bin calibration") + calibration_df = generate_calibration_df(df_deeplc_input, calibration_bins) + if debug: + calibration_df.to_csv(output + "_calibration.tsv", index=False, sep="\t") + df_deeplc_input.to_csv(output + "_deeplc_input.tsv", index=False, sep="\t") + df_deeplc_output = run_deeplc(df_deeplc_input, calibration_df) + + elif calibration_mode == "min_max": + LOG.info("Run DeepLC with min/max calibration") + if debug: + df_deeplc_input.to_csv(output + "_deeplc_input.tsv", index=False, sep="\t") + df_deeplc_output = run_deeplc(df_deeplc_input) + + if debug: + df_deeplc_output.to_csv(output + "_deeplc_output.tsv", index=False, sep="\t") + + # Create map containing the predicted retention time for each peptide sequence and modification + sequence_to_prediction = {} + for seq_mod, scan_id, pred_rt in zip( + df_deeplc_output["seq_with_mods"], + df_deeplc_output["scan_id"], + df_deeplc_output["predicted_RT"], + ): + sequence_to_prediction[(seq_mod, scan_id)] = pred_rt + + LOG.info("Add error to idXML") + peptide_ids_pred_RT = add_rt_error( + peptide_ids, + sequence_to_prediction, + add_abs_rt_error, + add_sqr_rt_error, + add_log_rt_error, + ) + + LOG.info("Write idXML") + IdXMLFile().store(output, protein_ids, peptide_ids_pred_RT) + + if debug: + df_deeplc_input.to_csv(output + "_deeplc_input.tsv", index=False, sep="\t") + if calibration_mode == "rt_bin" or calibration_mode == "idx_bin": + calibration_df.to_csv(output + "_calibration.tsv", index=False, sep="\t") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/get_ion_annotations.py b/bin/get_ion_annotations.py index 35bffff0..e6615479 100755 --- a/bin/get_ion_annotations.py +++ b/bin/get_ion_annotations.py @@ -7,7 +7,6 @@ import pandas as pd import numpy as np import argparse -from pyopenms.Plotting import * def parse_arguments() -> Tuple[argparse.ArgumentParser, argparse.Namespace]: diff --git a/bin/markdown_to_html.py b/bin/markdown_to_html.py index 06bc6b10..168daee6 100755 --- a/bin/markdown_to_html.py +++ b/bin/markdown_to_html.py @@ -12,7 +12,14 @@ def convert_markdown(in_fn): input_md = io.open(in_fn, mode="r", encoding="utf-8").read() html = markdown.markdown( "[TOC]\n" + input_md, - extensions=["pymdownx.extra", "pymdownx.b64", "pymdownx.highlight", "pymdownx.emoji", "pymdownx.tilde", "toc"], + extensions=[ + "pymdownx.extra", + "pymdownx.b64", + "pymdownx.highlight", + "pymdownx.emoji", + "pymdownx.tilde", + "toc", + ], extension_configs={ "pymdownx.b64": {"base_path": os.path.dirname(in_fn)}, "pymdownx.highlight": {"noclasses": True}, @@ -74,9 +81,18 @@ def wrap_html(contents): def parse_args(args=None): parser = argparse.ArgumentParser() - parser.add_argument("mdfile", type=argparse.FileType("r"), nargs="?", help="File to convert. Defaults to stdin.") parser.add_argument( - "-o", "--out", type=argparse.FileType("w"), default=sys.stdout, help="Output file name. Defaults to stdout." + "mdfile", + type=argparse.FileType("r"), + nargs="?", + help="File to convert. Defaults to stdin.", + ) + parser.add_argument( + "-o", + "--out", + type=argparse.FileType("w"), + default=sys.stdout, + help="Output file name. Defaults to stdout.", ) return parser.parse_args(args) diff --git a/bin/mhcnuggets_predict_peptides.py b/bin/mhcnuggets_predict_peptides.py index d4ae30ee..724e89b3 100755 --- a/bin/mhcnuggets_predict_peptides.py +++ b/bin/mhcnuggets_predict_peptides.py @@ -176,7 +176,12 @@ def main(): supp_alleles = parse_alleles(args.alleles) for allele in supp_alleles: - predict(class_="II", peptides_path=args.peptides, mhc=allele, output=allele + args.output) + predict( + class_="II", + peptides_path=args.peptides, + mhc=allele, + output=allele + args.output, + ) else: op = open("predicted_neoepitopes_class_2", "w") diff --git a/bin/ms2pip_cli.py b/bin/ms2pip_cli.py new file mode 100755 index 00000000..35df9eda --- /dev/null +++ b/bin/ms2pip_cli.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python +# Written by Jonas Scheid and Steffen Lemke + +import click +import logging +import numpy as np +import pandas as pd +import sys +from ms2pip.ms2pipC import MS2PIP +from pyopenms import IdXMLFile, ModificationsDB + + +# initate logger +console = logging.StreamHandler() +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +console.setFormatter(formatter) +LOG = logging.getLogger("MS2pip prediction") +LOG.addHandler(console) +LOG.setLevel(logging.INFO) + + +def parse_idxml(path: str) -> tuple[list, list]: + """ + Parse idXML file and return PeptideIdentification and ProteinIdentification objects. + + :param path: path to idXML file + :type path: str + :return: ProteinIdentification and PeptideIdentification objects + :rtype: (list, list) + """ + protein_ids = [] + peptide_ids = [] + IdXMLFile().load(path, protein_ids, peptide_ids) + + return protein_ids, peptide_ids + + +def peptide_ids_to_peprec_dataframe(peptide_ids: list, hit_idx: int = 0) -> pd.DataFrame: + """ + All the peptide identifications are parsed into a DataFrame in the style of + a PEPREC file (https://github.com/compomics/ms2pip#peprec-file). + + :param peptide_ids: List containing PeptideIdentification + :type peptide_ids: list + :param hit_idx: hit index to generate a peprec + :type hit_idx: int + :return: peprec pandas dataframe + :rtype: pd.DataFrame + """ + + columns = ["spec_id", "modifications", "peptide", "charge"] + data = [] + spectrum_reference_to_seq = {} + + for peptide_id in peptide_ids: + if len(peptide_id.getHits()) <= hit_idx: + continue + hit = peptide_id.getHits()[hit_idx] + spectrum_reference = peptide_id.getMetaValue("spectrum_reference") + + charge = hit.getCharge() + sequence = hit.getSequence() + unmodified_sequence = sequence.toUnmodifiedString() + + spectrum_reference_to_seq[spectrum_reference] = str(sequence) + + hit_mods = [] + for pos in range(0, sequence.size()): + residue = sequence.getResidue(pos) + if residue.isModified(): + hit_mods.append("|".join([str(pos + 1), residue.getModificationName()])) + if hit_mods == []: + modifications = "-" + else: + modifications = "|".join(hit_mods) + + data.append([spectrum_reference, modifications, unmodified_sequence, charge]) + + return pd.DataFrame(data, columns=columns), spectrum_reference_to_seq + + +def get_complete_spectrum_correlation(df_ms2pip_output: pd.DataFrame, method: str) -> pd.DataFrame: + """ + Get correlation coefficient for each predicted spectrum vs the measured one + + :param df_ms2pip_output: pandas dataframe of the ms2pip output with individual ion prediction values and DeepLC RT prediction + :type hit_idx: pd.DataFrame + :return: dict {: , : {}, ... } + :rtype: pd.DataFrame + """ + scannr_to_total_corr = {} + grouped_spec = df_ms2pip_output.groupby("spec_id") + correlations_spec = grouped_spec[["prediction", "target"]].corr(method=method) + + for group, corr in correlations_spec.groupby(level=[0, 1]): + correlation_value = corr.iloc[0, 1] + spec_id = group[0] + if group[1] == "prediction": + if np.isnan(correlation_value): + correlation_value = 0 + scannr_to_total_corr[spec_id] = correlation_value + + data = { + "ScanNr": scannr_to_total_corr.keys(), + "ion_corr": scannr_to_total_corr.values(), + } + df = pd.DataFrame.from_dict(data) + + return df + + +def generate_params_config( + fixed_modifications: list, + variable_modifications: list, + model_name: str, + fragment_error: float, +) -> dict: + """ + Generate the MS2PIP configuration file. + + :param fixed_modifications: List of fixed modifications to consider + :type fixed_modifications: list + :param modifications: List of modifications to consider + :type modifications: list + :param model_name: Name of the model to use + :type model_name: str + :param fragment_error: Fragment error to use + :type fragment_error: float + :return: MS2PIP configuration file + :rtype: dict + """ + mods = set(fixed_modifications.strip().split(",") + variable_modifications.strip().split(",")) + # Remove empty strings + mods = [mod for mod in mods if mod] + params = { + "ms2pip": { + "ptm": [ + f"{ModificationsDB().getModification(mod).getId()},{ModificationsDB().getModification(mod).getDiffMonoMass()},opt,{ModificationsDB().getModification(mod).getOrigin()}" + for mod in mods + ], + "model": model_name, + "frag_error": fragment_error, + "out": "csv", + "sptm": [], + "gptm": [], + } + } + return params + + +@click.command() +@click.option("--input_idxml", help="input path of idXML", required=True) +@click.option("--input_mzml", help="input path of mzML", required=True) +@click.option("--output_idxml", help="output path of idXML", required=True) +@click.option( + "--num_hits", + type=click.IntRange(min=1), + default=1, + help="number of peptides hits", +) +@click.option( + "--model_name", + type=str, + help="Name of MS2pip model (https://github.com/compomics/ms2pip#specialized-prediction-models)", +) +@click.option( + "--model_path", + type=str, + help="path to MS2pip model", +) +@click.option( + "--fragment_error", + type=float, + help="Fragment mass error in Da", +) +@click.option( + "--variable_mods", + type=str, + help="List of variable modifications", +) +@click.option( + "--fixed_mods", + type=str, + help="List of fixed modifications", +) +@click.option("--add_pearson", is_flag=True, help="add pearson spectrum simliartity") +@click.option( + "--num_cpus", + type=int, + help="number of cpus to use", +) +def main( + input_idxml: str, + input_mzml: str, + output_idxml: str, + num_hits: int, + model_name: str, + model_path: str, + fragment_error: float, + variable_mods: str, + fixed_mods: str, + add_pearson: bool, + num_cpus: int, +): + LOG.info("Parse idXML") + protein_ids, peptide_ids = parse_idxml(input_idxml) + + LOG.info("Generate params file for MS2pip") + params = generate_params_config(fixed_mods, variable_mods, model_name, fragment_error) + + LOG.info("Make MS2pip predictions") + scan_nr_seq_to_corr = {} + for hit_idx in range(num_hits): # number of hits to consider + df_peprec, scan_nr_to_seq = peptide_ids_to_peprec_dataframe(peptide_ids, hit_idx) + ms2pip = MS2PIP(pep_file=df_peprec, spec_file=input_mzml, params=params, return_results=True, num_cpu=num_cpus) + predictions = ms2pip.run() + correlation_df = get_complete_spectrum_correlation(predictions, "pearson") + + for scan_nr, ion_corr in zip(correlation_df["ScanNr"], correlation_df["ion_corr"]): + sequence = scan_nr_to_seq[scan_nr] + scan_nr_seq_to_corr[(scan_nr, sequence)] = ion_corr + + LOG.info("Add correlations scores to peptide identifications") + for peptide_id in peptide_ids: + spectrum_reference = peptide_id.getMetaValue("spectrum_reference") + new_hits = [] + for hit in peptide_id.getHits(): + sequence = str(hit.getSequence()) + if (spectrum_reference, sequence) in scan_nr_seq_to_corr.keys(): + hit.setMetaValue( + "spectrum_correlation", + scan_nr_seq_to_corr[(spectrum_reference, sequence)], + ) + else: + LOG.info(f"No correlation could be computed for {str(sequence)}") + hit.setMetaValue("spectrum_correlation", 0) + new_hits.append(hit) + peptide_id.setHits(new_hits) + + LOG.info("Write idXML") + IdXMLFile().store(output_idxml, protein_ids, peptide_ids) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/preprocess_neoepitopes_mhcnuggets.py b/bin/preprocess_neoepitopes_mhcnuggets.py index 3c08b32a..d454d1c2 100755 --- a/bin/preprocess_neoepitopes_mhcnuggets.py +++ b/bin/preprocess_neoepitopes_mhcnuggets.py @@ -41,7 +41,10 @@ def main(): model.add_argument("-n", "--neoepitopes", type=str, help="neoepitopes input file") model.add_argument( - "-o", "--output", type=str, help="preprocess neoepitope file for subsequent mhcnuggets prediction" + "-o", + "--output", + type=str, + help="preprocess neoepitope file for subsequent mhcnuggets prediction", ) args = model.parse_args() diff --git a/bin/resolve_neoepitopes.py b/bin/resolve_neoepitopes.py index 5f30ab70..378798c1 100755 --- a/bin/resolve_neoepitopes.py +++ b/bin/resolve_neoepitopes.py @@ -149,7 +149,13 @@ def main(): model.add_argument("-m", "--mztab", type=str, help="Path to mztab file") - model.add_argument("-f", "--file_format", type=str, default="csv", help="File format for output file") + model.add_argument( + "-f", + "--file_format", + type=str, + default="csv", + help="File format for output file", + ) model.add_argument("-o", "--output", type=str, required=True, help="Output file path") diff --git a/bin/variants2fasta.py b/bin/variants2fasta.py index b1b900da..308795a3 100755 --- a/bin/variants2fasta.py +++ b/bin/variants2fasta.py @@ -161,7 +161,11 @@ def main(): model.add_argument("-f", "--fasta_ref", type=str, default=None, help="Path to the fasta input file") model.add_argument( - "-p", "--proteins", type=str, default=None, help="Path to the protein ID input file (in HGNC-ID)" + "-p", + "--proteins", + type=str, + default=None, + help="Path to the protein ID input file (in HGNC-ID)", ) model.add_argument( @@ -173,7 +177,10 @@ def main(): ) model.add_argument( - "-fINDEL", "--filterINDEL", action="store_true", help="Filter insertions and deletions (including frameshifts)" + "-fINDEL", + "--filterINDEL", + action="store_true", + help="Filter insertions and deletions (including frameshifts)", ) model.add_argument("-fFS", "--filterFSINDEL", action="store_true", help="Filter frameshift INDELs") @@ -215,12 +222,20 @@ def main(): if args.filterINDEL: variants = filter( lambda x: x.type - not in [VariationType.INS, VariationType.DEL, VariationType.FSDEL, VariationType.FSINS], + not in [ + VariationType.INS, + VariationType.DEL, + VariationType.FSDEL, + VariationType.FSINS, + ], variants, ) if args.filterFSINDEL: - variants = filter(lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], variants) + variants = filter( + lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], + variants, + ) if not variants: sys.stderr.write("No variants left after filtering. Please refine your filtering criteria.\n") diff --git a/bin/vcf_neoepitope_predictor.py b/bin/vcf_neoepitope_predictor.py index b4227d23..95c08733 100755 --- a/bin/vcf_neoepitope_predictor.py +++ b/bin/vcf_neoepitope_predictor.py @@ -224,19 +224,35 @@ def main(): ) model.add_argument( - "-p", "--proteins", type=str, default=None, help="Path to the protein ID input file (in HGNC-ID)" + "-p", + "--proteins", + type=str, + default=None, + help="Path to the protein ID input file (in HGNC-ID)", ) model.add_argument( - "-minl", "--peptide_min_length", type=int, default=8, help="Minimum peptide length for epitope prediction" + "-minl", + "--peptide_min_length", + type=int, + default=8, + help="Minimum peptide length for epitope prediction", ) model.add_argument( - "-maxl", "--peptide_max_length", type=int, default=12, help="Maximum peptide length for epitope prediction" + "-maxl", + "--peptide_max_length", + type=int, + default=12, + help="Maximum peptide length for epitope prediction", ) model.add_argument( - "-a", "--alleles", type=str, required=True, help="Path to the allele file (one per line in new nomenclature)" + "-a", + "--alleles", + type=str, + required=True, + help="Path to the allele file (one per line in new nomenclature)", ) model.add_argument( @@ -248,7 +264,10 @@ def main(): ) model.add_argument( - "-fINDEL", "--filterINDEL", action="store_true", help="Filter insertions and deletions (including frameshifts)" + "-fINDEL", + "--filterINDEL", + action="store_true", + help="Filter insertions and deletions (including frameshifts)", ) model.add_argument("-fFS", "--filterFSINDEL", action="store_true", help="Filter frameshift INDELs") @@ -294,12 +313,20 @@ def main(): if args.filterINDEL: variants = filter( lambda x: x.type - not in [VariationType.INS, VariationType.DEL, VariationType.FSDEL, VariationType.FSINS], + not in [ + VariationType.INS, + VariationType.DEL, + VariationType.FSDEL, + VariationType.FSINS, + ], variants, ) if args.filterFSINDEL: - variants = filter(lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], variants) + variants = filter( + lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], + variants, + ) if not variants: sys.stderr.write("No variants left after filtering. Please refine your filtering criteria.\n") @@ -340,7 +367,11 @@ def main(): if protein_seq is not None: transcript_to_genes[ensembl_ids[EAdapterFields.TRANSID]] = l.strip() proteins.append( - Protein(protein_seq, gene_id=l.strip(), transcript_id=ensembl_ids[EAdapterFields.TRANSID]) + Protein( + protein_seq, + gene_id=l.strip(), + transcript_id=ensembl_ids[EAdapterFields.TRANSID], + ) ) epitopes = [] for length in range(args.peptide_min_length, args.peptide_max_length): diff --git a/bin/vcf_reader.py b/bin/vcf_reader.py index 743ab03b..9ad879fa 100755 --- a/bin/vcf_reader.py +++ b/bin/vcf_reader.py @@ -237,7 +237,15 @@ def read_vcf(filename, pass_only=True): if coding: pos, reference, alternative = get_fred2_annotation(vt, p, r, str(alt)) var = Variant( - "line" + str(num), vt, c, pos, reference, alternative, coding, isHomozygous, isSynonymous + "line" + str(num), + vt, + c, + pos, + reference, + alternative, + coding, + isHomozygous, + isSynonymous, ) var.gene = gene var.log_metadata("vardbid", variation_dbid) @@ -254,7 +262,17 @@ def read_vcf(filename, pass_only=True): for tId, vs in transToVar.iteritems(): if len(vs) > 10: for v in vs: - vs_new = Variant(v.id, v.type, v.chrom, v.genomePos, v.ref, v.obs, v.coding, True, v.isSynonymous) + vs_new = Variant( + v.id, + v.type, + v.chrom, + v.genomePos, + v.ref, + v.obs, + v.coding, + True, + v.isSynonymous, + ) vs_new.gene = v.gene for m in ["vardbid"]: vs_new.log_metadata(m, v.get_metadata(m)) diff --git a/conf/base.config b/conf/base.config index 197923f3..4bdccc70 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 4.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -56,4 +56,9 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + withName:TDF2MZML { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 10.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } } diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100644 index 00000000..3f114377 --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,440 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ + +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } + } +} diff --git a/conf/modules.config b/conf/modules.config index a53e6553..62f0f4e1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,148 +13,216 @@ process { publishDir = [ - path: {"${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}"}, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: SAMPLESHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: {filename -> filename.equals('versions.yml') ? null : filename}, - enabled: false + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + } - withName: 'SAMPLESHEET_CHECK' { - publishDir = [ - path: {"${params.outdir}/pipeline_info"}, - mode: params.publish_dir_mode - ] - } - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - publishDir = [ - path: {"${params.outdir}/pipeline_info"}, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } - withName: 'MULTIQC' { - publishDir = [ - path: {"${params.outdir}/multiqc"}, - mode: params.publish_dir_mode, - enabled: true - ] - } + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } process { + withName: 'THERMORAWFILEPARSER' { + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + enabled: false + ] + } + + withName: 'TDF2MZML' { + publishDir = [ + enabled: false + ] + } + withName: 'GENERATE_PROTEINS_FROM_VCF' { - ext.args = [ - "-t ${params.variant_annotation_style}", - "-r ${params.variant_reference}", - params.variant_indel_filter ? "-fINDEL" : "", - params.variant_frameshift_filter ? "-fFS" : "", - params.variant_snp_filter ? "-fSNP" : "" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}"}, - mode: params.publish_dir_mode, - pattern: '*.fasta' - ] - } + ext.args = [ + "-t ${params.variant_annotation_style}", + "-r ${params.variant_reference}", + params.variant_indel_filter ? "-fINDEL" : "", + params.variant_frameshift_filter ? "-fFS" : "", + params.variant_snp_filter ? "-fSNP" : "" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + pattern: '*.fasta' + ] + } + + withName: 'OPENMS_DECOYDATABASE' { + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_FILEFILTER' { + publishDir = [ + enabled: false + ] + } withName: 'OPENMS_MAPALIGNERIDENTIFICATION' { - ext.args = [ - "-model:type linear", - "-algorithm:max_rt_shift ${params.max_rt_alignment_shift}" - ].join(' ').trim() - } + ext.args = [ + "-model:type linear", + "-algorithm:max_rt_shift ${params.max_rt_alignment_shift}" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/alignment"}, + mode: params.publish_dir_mode, + pattern: '*.trafoXML' + ] + } + + withName: 'OPENMS_MAPRTTRANSFORMERMZML|OPENMS_MAPRTTRANSFORMERIDXML' { + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_IDMERGER*' { + publishDir = [ + enabled: false + ] + } withName: 'OPENMS_COMETADAPTER' { - ext.args = [ - "-precursor_mass_tolerance ${params.precursor_mass_tolerance}", - "-fragment_mass_tolerance ${params.fragment_mass_tolerance}", - "-fragment_bin_offset ${params.fragment_bin_offset}", - "-instrument ${params.instrument}", - "-num_hits ${params.num_hits}", - "-digest_mass_range ${params.digest_mass_range}", - "-max_variable_mods_in_peptide ${params.number_mods}", - "-missed_cleavages 0", - "-precursor_charge ${params.prec_charge}", - "-activation_method ${params.activation_method}", - "-variable_modifications ${params.variable_mods.tokenize(',').collect {"'${it}'"}.join(" ")}", - "-enzyme '${params.enzyme}'", - "-spectrum_batch_size ${params.spectrum_batch_size}" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/comet"}, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - } + ext.args = [ + "-precursor_mass_tolerance ${params.precursor_mass_tolerance}", + "-fragment_mass_tolerance ${params.fragment_mass_tolerance}", + "-fragment_bin_offset ${params.fragment_bin_offset}", + "-instrument ${params.instrument}", + "-num_hits ${params.num_hits}", + "-digest_mass_range ${params.digest_mass_range}", + "-max_variable_mods_in_peptide ${params.number_mods}", + "-missed_cleavages 0", + "-precursor_charge ${params.prec_charge}", + "-activation_method ${params.activation_method}", + "-variable_modifications ${params.variable_mods.tokenize(',').collect {"'${it}'"}.join(" ")}", + "-enzyme '${params.enzyme}'", + "-spectrum_batch_size ${params.spectrum_batch_size}" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/comet"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } - withName: 'OPENMS_IDFILTER_FOR_ALIGNMENT' { - ext.args = [ - "-remove_decoys", - "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", - "-delete_unreferenced_peptide_hits", - (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/alignment"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + withName: 'OPENMS_PEPTIDEINDEXER' { + publishDir = [ + enabled: false + ] + } withName: 'OPENMS_IDFILTER_Q_VALUE' { - ext.prefix = {"${meta.id}_fdr_filtered"} - ext.args = [ - "-remove_decoys", - "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", - "-delete_unreferenced_peptide_hits", - (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/alignment"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + ext.prefix = {"${meta.id}_pout_filtered"} + ext.args = [ + "-remove_decoys", + "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", + "-delete_unreferenced_peptide_hits", + (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/percolator"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } withName: 'OPENMS_PERCOLATORADAPTER' { - ext.prefix = {"${meta.id}_all_ids_merged_psm_perc"} - ext.args = [ - "-seed 4711", - "-trainFDR 0.05", - "-testFDR 0.05", - "-enzyme no_enzyme", - "-subset_max_train ${params.subset_max_train}", - "-doc ${params.description_correct_features} ", - (params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : "" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/percolator"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + ext.args = [ + "-seed 4711", + "-trainFDR 0.05", + "-testFDR 0.05", + "-enzyme no_enzyme", + "-subset_max_train ${params.subset_max_train}", + "-doc ${params.description_correct_features} ", + "-post_processing_tdc", + (params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : "" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/percolator"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } withName: 'OPENMS_PSMFEATUREEXTRACTOR' { - publishDir = [ - path: {"${params.outdir}/intermediate_results/features"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + publishDir = [ + path: {"${params.outdir}/intermediate_results/percolator"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } + + withName: 'OPENMS_MZTABEXPORTER' { + ext.prefix = {"${meta.id}"} + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + pattern: '*.mzTab' + ] + } + + withName: 'OPENMS_IDRIPPER' { + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } + + withName: 'OPENMS_IDSCORESWITCHER' { + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } - withName: 'OPENMS_MZTABEXPORTER_QUANT' { - publishDir = [ - path: {"${params.outdir}/intermediate_results/features"}, - mode: params.publish_dir_mode, - pattern: '*.mzTab' - ] + withName: 'PYOPENMS_IDFILTER' { + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] } withName: 'OPENMS_FEATUREFINDERIDENTIFICATION' { + ext.args = [ + "-extract:mz_window ${params.quantification_mz_window}", + "-extract:rt_window ${params.quantification_rt_window}", + "-detect:mapping_tolerance ${params.quantification_mapping_tolerance}", + "-detect:peak_width ${params.quantification_peak_width}", + "-detect:min_peak_width ${params.quantification_min_peak_width}" + ].join(' ').trim() publishDir = [ path: {"${params.outdir}/intermediate_results/features"}, mode: params.publish_dir_mode, @@ -162,29 +230,27 @@ process { ] } - withName: 'OPENMS_TEXTEXPORTER_UNQUANTIFIED|OPENMS_TEXTEXPORTER_QUANTIFIED' { - publishDir = [ - path: {"${params.outdir}/"}, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - } + withName: 'OPENMS_FEATURELINKERUNLABELEDKD' { + publishDir = [ + enabled: false + ] + } - withName: 'OPENMS_TEXTEXPORTER_COMET' { - ext.prefix = {"${meta.sample}_${meta.condition}_${meta.id}"} - publishDir = [ - path: {"${params.outdir}/intermediate_results/comet"}, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - } + withName: 'OPENMS_TEXTEXPORTER' { + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } withName: 'OPENMS_IDCONFLICTRESOLVER' { - publishDir = [ - path: {"${params.outdir}/intermediate_results/features"}, - mode: params.publish_dir_mode, - pattern: '*.consensusXML' - ] + publishDir = [ + path: {"${params.outdir}/intermediate_results/features"}, + mode: params.publish_dir_mode, + pattern: '*.consensusXML', + enabled: false + ] } } @@ -384,42 +450,11 @@ process { } } -process { - - if (params.predict_RT) { - withName: 'OPENMS_RTMODEL' { - publishDir = [ - path: {"${params.outdir}/RT_prediction"}, - mode: params.publish_dir_mode, - pattern: '*.txt|*.paramXML' - ] - } - - withName: 'OPENMS_RTPREDICT_FOUND_PEPTIDES' { - ext.prefix = {"${meta.sample}_id_files_for_rt_prediction_RTpredicted"} - publishDir = [ - path: {"${params.outdir}/RT_prediction"}, - mode: params.publish_dir_mode, - pattern: '*.csv' - ] - } - - withName: 'OPENMS_RTPREDICT_NEOEPITOPES' { - ext.prefix = {"${meta.sample}_txt_file_for_rt_prediction_RTpredicted"} - publishDir = [ - path: {"${params.outdir}/RT_prediction"}, - mode: params.publish_dir_mode, - pattern: '*.csv' - ] - } - } -} process { if (params.annotate_ions) { withName: 'PYOPENMS_IONANNOTATOR' { - ext.prefix = {"${meta.sample}"} ext.args = [ "--precursor_charge ${params.prec_charge}", "--fragment_mass_tolerance ${params.fragment_mass_tolerance}", @@ -430,6 +465,34 @@ process { mode: params.publish_dir_mode, pattern: '*.tsv' ] - } } + } +} + + +process { + if (params.use_deeplc) { + withName: 'DEEPLC' { + publishDir = [ + path: {"${params.outdir}/DeepLC"}, + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } + } +} + + +process { + if (params.use_ms2pip) { + withName: 'MS2PIP' { + publishDir = [ + path: {"${params.outdir}/MS2PIP"}, + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } + } } diff --git a/conf/test.config b/conf/test.config index b3f4b221..029cfbde 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,5 +22,7 @@ params { // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' - allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_allele_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true } diff --git a/conf/test_deeplc.config b/conf/test_deeplc.config new file mode 100644 index 00000000..dff885b5 --- /dev/null +++ b/conf/test_deeplc.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with DeepLC +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_deeplc, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test DeepLC profile' + config_profile_description = 'Minimal test dataset to check pipeline function with DeepLC' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true + use_deeplc = true + deeplc_add_abs_rt_error = true + deeplc_add_sqr_rt_error = true + deeplc_add_log_rt_error = true +} diff --git a/conf/test_full.config b/conf/test_full.config index 1ba617ee..17eef6c3 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,10 +14,8 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - predict_class_1 = true - // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' - allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/allele_sheet_full.tsv' + } diff --git a/conf/test_ionannotator.config b/conf/test_ionannotator.config new file mode 100644 index 00000000..0de44b10 --- /dev/null +++ b/conf/test_ionannotator.config @@ -0,0 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running ion annotator tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_ionannotator, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test ion annotator profile' + config_profile_description = 'Minimal test dataset to check pipeline function with ion annotator' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true + annotate_ions = true +} diff --git a/conf/test_ms2pip.config b/conf/test_ms2pip.config new file mode 100644 index 00000000..6343b683 --- /dev/null +++ b/conf/test_ms2pip.config @@ -0,0 +1,30 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with MS2PIP +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_ms2pip, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test MS2PIP profile' + config_profile_description = 'Minimal test dataset to check pipeline function with MS2PIP' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true + use_ms2pip = true + ms2pip_model_name = 'Immuno-HCD' +} diff --git a/docs/images/mhcquant_subway.png b/docs/images/mhcquant_subway.png new file mode 100644 index 00000000..2de45154 Binary files /dev/null and b/docs/images/mhcquant_subway.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e47..00000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb8..00000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf5..00000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index 1e74ea39..112aa819 100644 --- a/docs/output.md +++ b/docs/output.md @@ -8,21 +8,27 @@ The directories listed below will be created in the results directory after the ## General -### Quantification - -
    Output files -- `*.tsv` : If `--skip_quantification` is not specified. +- `*.mzTab` +- `*.tsv` -
    +The mzTab output file follows the a [HUPO-PSI format]() and combines all information of the sample-condition group extracted from a database search throughout the pipeline. A detailed explanation of the respective entries are elaborately explained [here](https://psidev.info/sites/default/files/2017-07/R2_The_ten_minute_guide_to_mzTab.pdf). MzTab files are compatible with the PRIDE Archive - proteomics data repository and can be uploaded as search files. -The CSV output file is a table containing all information extracted from a database search throughout the pipeline. See the [OpenMS](https://www.openms.de/) or PSI documentation for more information about [annotated scores and format](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_TextExporter.html). +MzTab files contain many columns and annotate the most important information - here are a few outpointed: + +```bash +PEP sequence accession best_search_engine_score[1] retention_time charge mass_to_charge peptide_abundance_study_variable[1] +``` + +Most important to know is that in this format we annotated the Comet XCorr of each peptide identification in the `best_search_engine_score[1]` column and peptide quantities in the `peptide_abundance_study_variable` columns. If `--skip_quantification` is specified the `best_search_engine_score[1]` holds the percolator q-value. + +The TSV output file is an alternative output of [OpenMS](https://www.openms.de/) comprising similar information to the mzTab output. A brief explanation of the structure is listed below. See documentation of the format or PSI documentation for more information about [annotated scores and format](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_TextExporter.html). MAP contains information about the different mzML files that were provided initially ```bash -#MAP id filename label size +#MAP id filename label size ``` RUN contains information about the search that was performed on each run @@ -57,44 +63,44 @@ PEPTIDE contains information about peptide hits that were identified and corresp ### Intermediate results -
    +
    + +This folder contains the intermediate results from various steps of the MHCquant pipeline (e.g. (un)filtered PSMs, aligned mzMLs, features) + Output files - `intermediate_results/` - - `alignment` - - `*filtered.idXML` : If `--skip_quantification` is not specified, then this file is generated in the `OPENMS_IDFILTER_Q_VALUE` - - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_FOR_ALIGNMENT`, this file is only generated when `--skip_quantification` is not specified - - `comet` - - `{raw filename}.tsv` : The outcome of `CometAdapter` containing more detailed information about all of the hits that have been found (no filtering has been applied) - - `{Sample}_{Condition}_{ID}.tsv` : Single files that hold information about the peptides sequences that have been identified (no filtering has been applied) - - `features` - - `*.mztab` : mztab file generated by the OpenMS MzTabExporter command which is present in the `PROCESS_FEATURE` step - - `*.idXML` : Outcome of `PSMFEATUREEXTRACTOR`, containing the computations of extra features for each input PSM - - `*.featureXML` : These files file is generated by the OpenMS `FeatureFinderIdentification` command - - `ion_annotations` - - `{Sample}_{Condition}_all_peaks.tsv`: Contains metadata of all measured ions of peptides reported after `OPENMS_IDFILTER_Q_VALUE`. - - `{Sample}_{Condition}_matching_ions.tsv`: Contains ion annotations and additional metadata of peptides reported after `OPENMS_IDFILTER_Q_VALUE`. + + - `alignment`: Contains the `trafoXML` files of each run that document the retention time shift after alignment in quantification mode. + + - `comet`: Contains pin files generated by comet after database search - `percolator` - - `*all_ids_merged_psm_perc.idXML` : idXML files are generated with `OPENMS_PERCOLATORADAPTER` - - `refined_fdr` (Only if `--refine_fdr_on_predicted_subset` is specified) - - `*merged_psm_perc_filtered.mzTab` : This file export filtered percolator results (by q-value) as mztab - - `*_all_ids_merged.mzTab` : Exportas all of the psm results as mztab - - `*perc_subset.idXML` : This file is the outcome of a second OpenMS `PercolatorAdapter` run - - `*pred_filtered.idXML` : Contains filtered PSMs prediction results by shrinked search space (outcome mhcflurry). - - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_REFINED` -
    + - `{Sample}_{Condition}_psm.idXML`: File holding extra features that will be used by percolator. Created by [PSMFeatureExtractor](https://openms.de/doxygen/release/3.0.0/html/UTILS_PSMFeatureExtractor.html). + - `{Sample}_{Condition}_pout.idXML`: Unfiltered percolator output. + - `{Sample}_{Condition}_pout_filtered.idXML`: FDR-filtered percolator output. -This folder contains the intermediate results from various steps of the MHCquant pipeline (e.g. (un)filtered PSMs, aligned mzMLs, features) + - `features`: Holds information of quantified features in `featureXML` files as a result of the [FeatureFinderIdentification](https://openms.de/doxygen/release/3.0.0/html/TOPP_FeatureFinderIdentification.html) in the quantification mode. -The output mzTab contains many columns annotating the most important information - here are a few outpointed: +- `ion_annotations` -```bash -PEP sequence accession best_search_engine_score[1] retention_time charge mass_to_charge peptide_abundance_study_variable[1] -``` + - `{Sample}_{Condition}_all_peaks.tsv`: Contains metadata of all measured ions of peptides reported after peptide identification. + + - `{Sample}_{Condition}_matching_ions.tsv`: Contains ion annotations and additional metadata of peptides reported after peptide identification. + +- `refined_fdr` (Only if `--refine_fdr_on_predicted_subset` is specified) + + - `*merged_psm_perc_filtered.mzTab` : This file export filtered percolator results (by q-value) as mzTab. + + - `*_all_ids_merged.mzTab` : Exportas all of the psm results as mztab. -Most important to know is that in this format we annotated the q-value of each peptide identification in the `best_search_engine_score[1]` column and peptide quantities in the peptide_abundance_study_variable` columns. -[mzTab](http://www.psidev.info/mztab) is a light-weight format to report mass spectrometry search results. It provides all important information about identified peptide hits and is compatible with the PRIDE Archive - proteomics data repository. + - `*perc_subset.idXML` : This file is the outcome of a second OpenMS `PercolatorAdapter` run. + + - `*pred_filtered.idXML` : Contains filtered PSMs prediction results by shrinked search space (outcome mhcflurry). + + - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_REFINED`. + +
    ## VCF @@ -118,8 +124,11 @@ These CSV files list all of the theoretically possible neoepitope sequences from Output files - `class_1_bindings/` + - `*found_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified + - `class_2_bindings/` + - `*found_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified @@ -137,9 +146,12 @@ peptide sequence geneID Output files - `class_1_bindings/` - - `*vcf_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified + +- `*vcf_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified + - `class_2_bindings/` - - `*vcf_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified + +- `*vcf_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified @@ -158,9 +170,12 @@ Sequence Antigen ID Variants Output files - `class_1_bindings/` - - `*predicted_peptides_class_1.csv`: If `--predict_class_1` is specified, then this CSV is generated + +- `*predicted_peptides_class_1.csv`: If `--predict_class_1` is specified, then this CSV is generated + - `class_2_bindings/` - - `*predicted_peptides_class_2.csv`: If `--predict_class_2` is specified, then this CSV is generated + +- `*predicted_peptides_class_2.csv`: If `--predict_class_2` is specified, then this CSV is generated @@ -171,26 +186,18 @@ The prediction outputs are comma-separated table (CSV) for each allele, listing peptide allele prediction prediction_low prediction_high prediction_percentile ``` -## Retention time prediction +### MultiQC
    Output files -- `RT_prediction` - - `*id_RTpredicted.csv`: If `--predict_RT` is specified, the retention time found peptides are provided - - `*txt_RTpredicted.csv`: If `--predict_RT` is specified, the retention time predicted neoepitopes are provided +- `multiqc/` -
    +- `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. -### MultiQC +- `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. -
    -Output files - -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `multiqc_plots/`: directory containing static images from the report in various formats.
    @@ -204,9 +211,11 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ Output files - `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.html`. - Reports generated by the pipeline: `software_versions.yml`. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index e5b76cdd..e454a066 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -27,7 +27,7 @@ ID Sample Condition ReplicateFileName ### Full samplesheet -The pipeline will auto-detect whether a sample is either a mzML or raw files using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. +The pipeline will auto-detect whether a sample is either in mzML, raw or Bruker's tdf file format using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. A final samplesheet file consisting of both single- and paired-end data may look something like the one below. @@ -53,7 +53,7 @@ ID Sample Condition ReplicateFileName | `ID` | An incrementing value which acts as a unique number for the given sample | | `Sample` | Custom sample name. This entry will be identical for multiple MS runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `Condition` | Additional information of the sample can be defined here. | -| `ReplicateFileName` | Full path to the MS outcome file. These files have the extentions ".raw" or ".mzML" | +| `ReplicateFileName` | Full path to the MS outcome file. These files have the extentions ".raw", ".mzML" or ".d" | An [example samplesheet](../assets/samplesheet.tsv) has been provided with the pipeline. @@ -62,7 +62,7 @@ An [example samplesheet](../assets/samplesheet.tsv) has been provided with the p The typical command for running the pipeline is as follows: ```console -nextflow run nf-core/mhcquant --input 'samples.tsv' --outdir --fasta 'SWISSPROT_2020.fasta' --allele_sheet 'alleles.tsv' --vcf_sheet 'variants.tsv' --include_proteins_from_vcf --predict_class_1 --spectrum_batch_size 500 -profile docker +nextflow run nf-core/mhcquant --input 'samples.tsv' --outdir --fasta 'SWISSPROT_2020.fasta' --use_deeplc --use_ms2pip -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -76,6 +76,31 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/mhcquant -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -92,17 +117,27 @@ First, go to the [nf-core/mhcquant releases page](https://github.com/nf-core/mhc This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). @@ -124,8 +159,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -141,102 +178,19 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -#### For beginners - -A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. - -#### Advanced option on process level - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers (advanced users) - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6e..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..01b8653d 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -128,7 +129,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -222,6 +223,21 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index f232749a..f0ab505a 100644 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/mhcquant pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -19,40 +21,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} ⁠-⁠-⁠input 'samples.tsv' ⁠-⁠-⁠fasta 'SWISSPROT_2020.fasta' ⁠-⁠-⁠allele_sheet 'alleles.tsv' ⁠-⁠-⁠vcf_sheet 'variants.tsv' ⁠-⁠-⁠include_proteins_from_vcf ⁠-⁠-⁠predict_class_1 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -61,14 +34,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/lib/WorkflowMhcquant.groovy b/lib/WorkflowMhcquant.groovy index 05bc9acf..a99924b3 100644 --- a/lib/WorkflowMhcquant.groovy +++ b/lib/WorkflowMhcquant.groovy @@ -2,6 +2,7 @@ // This file holds several functions specific to the workflow/mhcquant.nf in the nf-core/mhcquant pipeline // +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowMhcquant { @@ -138,15 +139,57 @@ class WorkflowMhcquant { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() @@ -154,4 +197,18 @@ class WorkflowMhcquant { return description_html } + + // + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + } + } } diff --git a/main.nf b/main.nf index 9e273de6..ec9081bf 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,6 @@ nf-core/mhcquant ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/mhcquant - Website: https://nf-co.re/mhcquant Slack : https://nfcore.slack.com/channels/mhcquant ---------------------------------------------------------------------------------------- @@ -18,6 +17,22 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index aa86fecc..f9875084 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] } } diff --git a/modules/local/deeplc.nf b/modules/local/deeplc.nf new file mode 100644 index 00000000..5f7ea37d --- /dev/null +++ b/modules/local/deeplc.nf @@ -0,0 +1,41 @@ +process DEEPLC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::deeplc=2.2.0 bioconda::pyopenms=2.9.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' : + 'biocontainers/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' }" + + input: + tuple val(meta), path(idxml_in) + + output: + tuple val(meta), path('*.idXML'), emit: idxml + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = idxml_in.baseName + def add_abs_rt_error = params.deeplc_add_abs_rt_error ? "--add_abs_rt_error" : "" + def add_sqr_rt_error = params.deeplc_add_sqr_rt_error ? "--add_sqr_rt_error" : "" + def add_log_rt_error = params.deeplc_add_log_rt_error ? "--add_log_rt_error" : "" + + """ + deeplc_cli.py \\ + --input $idxml_in \\ + --output ${prefix}_deeplc.idXML \\ + --calibration_mode ${params.deeplc_calibration_mode} \\ + --calibration_bins ${params.deeplc_calibration_bins} \\ + $add_abs_rt_error \\ + $add_sqr_rt_error \\ + $add_log_rt_error + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + DeepLC: \$(deeplc --version) + END_VERSIONS + """ +} diff --git a/modules/local/generate_proteins_from_vcf.nf b/modules/local/generate_proteins_from_vcf.nf index dc04f818..d91e5384 100644 --- a/modules/local/generate_proteins_from_vcf.nf +++ b/modules/local/generate_proteins_from_vcf.nf @@ -2,10 +2,10 @@ process GENERATE_PROTEINS_FROM_VCF { tag "$meta" label 'process_medium' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), path(fasta), path(vcf) diff --git a/modules/local/mhcflurry_predictneoepitopesclass1.nf b/modules/local/mhcflurry_predictneoepitopesclass1.nf index 071b0bba..9c84ea9f 100644 --- a/modules/local/mhcflurry_predictneoepitopesclass1.nf +++ b/modules/local/mhcflurry_predictneoepitopesclass1.nf @@ -1,11 +1,11 @@ process MHCFLURRY_PREDICTNEOEPITOPESCLASS1 { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), val(allotypes), path(neoepitopes) @@ -18,7 +18,7 @@ process MHCFLURRY_PREDICTNEOEPITOPESCLASS1 { task.ext.when == null || task.ext.when script: - def prefix = task.ext.suffix ?: "${neoepitopes}_${meta}_predicted_neoepitopes_class_1" + def prefix = task.ext.suffix ?: "${neoepitopes}_${meta.id}_predicted_neoepitopes_class_1" """ mhcflurry-downloads --quiet fetch models_class1 diff --git a/modules/local/mhcflurry_predictpeptidesclass1.nf b/modules/local/mhcflurry_predictpeptidesclass1.nf index efd670cf..d828542a 100644 --- a/modules/local/mhcflurry_predictpeptidesclass1.nf +++ b/modules/local/mhcflurry_predictpeptidesclass1.nf @@ -1,11 +1,11 @@ process MHCFLURRY_PREDICTPEPTIDESCLASS1 { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), path(mztab), val(alleles) diff --git a/modules/local/mhcflurry_predictpsms.nf b/modules/local/mhcflurry_predictpsms.nf index b66d2889..f5e92fc1 100644 --- a/modules/local/mhcflurry_predictpsms.nf +++ b/modules/local/mhcflurry_predictpsms.nf @@ -1,11 +1,11 @@ process MHCFLURRY_PREDICTPSMS { - tag "$meta" + tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), path(perc_mztab), path(psm_mztab), val(allotypes) diff --git a/modules/local/mhcnuggets_neoepitopesclass2post.nf b/modules/local/mhcnuggets_neoepitopesclass2post.nf index a97f6a2c..721b6cfe 100644 --- a/modules/local/mhcnuggets_neoepitopesclass2post.nf +++ b/modules/local/mhcnuggets_neoepitopesclass2post.nf @@ -1,11 +1,11 @@ process MHCNUGGETS_NEOEPITOPESCLASS2POST { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mhcnuggets:2.3.2--py_0' : - 'quay.io/biocontainers/mhcnuggets:2.3.2--py_0' }" + 'biocontainers/mhcnuggets:2.3.2--py_0' }" input: tuple val(meta), path(neoepitopes), path(predicted) diff --git a/modules/local/mhcnuggets_neoepitopesclass2pre.nf b/modules/local/mhcnuggets_neoepitopesclass2pre.nf index 8d03799a..fd68fec4 100644 --- a/modules/local/mhcnuggets_neoepitopesclass2pre.nf +++ b/modules/local/mhcnuggets_neoepitopesclass2pre.nf @@ -1,11 +1,11 @@ process MHCNUGGETS_NEOEPITOPESCLASS2PRE { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mhcnuggets:2.3.2--py_0' : - 'quay.io/biocontainers/mhcnuggets:2.3.2--py_0' }" + 'biocontainers/mhcnuggets:2.3.2--py_0' }" input: tuple val(meta), path(neoepitopes) @@ -18,7 +18,7 @@ process MHCNUGGETS_NEOEPITOPESCLASS2PRE { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta}_mhcnuggets_preprocessed" + def prefix = task.ext.prefix ?: "${meta.id}_mhcnuggets_preprocessed" """ preprocess_neoepitopes_mhcnuggets.py \\ diff --git a/modules/local/mhcnuggets_peptidesclass2post.nf b/modules/local/mhcnuggets_peptidesclass2post.nf index f35c7698..fd114f36 100644 --- a/modules/local/mhcnuggets_peptidesclass2post.nf +++ b/modules/local/mhcnuggets_peptidesclass2post.nf @@ -1,11 +1,11 @@ process MHCNUGGETS_PEPTIDESCLASS2POST { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mhcnuggets:2.3.2--py_0' : - 'quay.io/biocontainers/mhcnuggets:2.3.2--py_0' }" + 'biocontainers/mhcnuggets:2.3.2--py_0' }" input: tuple val(meta), path(peptides), path(peptide_to_geneID) @@ -18,7 +18,7 @@ process MHCNUGGETS_PEPTIDESCLASS2POST { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_postprocessed" + def prefix = task.ext.prefix ?: "${meta.id}_postprocessed" """ postprocess_peptides_mhcnuggets.py --input $peptides \\ diff --git a/modules/local/mhcnuggets_peptidesclass2pre.nf b/modules/local/mhcnuggets_peptidesclass2pre.nf index 56906804..a3b140aa 100644 --- a/modules/local/mhcnuggets_peptidesclass2pre.nf +++ b/modules/local/mhcnuggets_peptidesclass2pre.nf @@ -1,11 +1,11 @@ process MHCNUGGETS_PEPTIDESCLASS2PRE { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mhcnuggets:2.3.2--py_0' : - 'quay.io/biocontainers/mhcnuggets:2.3.2--py_0' }" + 'biocontainers/mhcnuggets:2.3.2--py_0' }" input: tuple val(meta), path(mztab) @@ -19,7 +19,7 @@ process MHCNUGGETS_PEPTIDESCLASS2PRE { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_preprocessed_mhcnuggets_peptides" + def prefix = task.ext.prefix ?: "${meta.id}_preprocessed_mhcnuggets_peptides" """ preprocess_peptides_mhcnuggets.py --mztab $mztab \\ diff --git a/modules/local/mhcnuggets_predictneoepitopesclass2.nf b/modules/local/mhcnuggets_predictneoepitopesclass2.nf index 7f586643..d2c35eef 100644 --- a/modules/local/mhcnuggets_predictneoepitopesclass2.nf +++ b/modules/local/mhcnuggets_predictneoepitopesclass2.nf @@ -1,11 +1,11 @@ process MHCNUGGETS_PREDICTNEOEPITOPESCLASS2 { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), path(neoepitopes), val(alleles) @@ -18,7 +18,7 @@ process MHCNUGGETS_PREDICTNEOEPITOPESCLASS2 { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta}_predicted_neoepitopes_class_2" + def prefix = task.ext.prefix ?: "${meta.id}_predicted_neoepitopes_class_2" """ mhcnuggets_predict_peptides.py --peptides $neoepitopes \\ diff --git a/modules/local/mhcnuggets_predictpeptidesclass2.nf b/modules/local/mhcnuggets_predictpeptidesclass2.nf index 5fb5fc5f..d275e7c2 100644 --- a/modules/local/mhcnuggets_predictpeptidesclass2.nf +++ b/modules/local/mhcnuggets_predictpeptidesclass2.nf @@ -1,11 +1,11 @@ process MHCNUGGETS_PREDICTPEPTIDESCLASS2 { - tag "$meta" + tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), path(peptides), val(alleles) @@ -18,7 +18,7 @@ process MHCNUGGETS_PREDICTPEPTIDESCLASS2 { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_predicted_peptides_class_2" + def prefix = task.ext.prefix ?: "${meta.id}_predicted_peptides_class_2" """ mhcnuggets_predict_peptides.py --peptides $peptides \\ diff --git a/modules/local/ms2pip.nf b/modules/local/ms2pip.nf new file mode 100644 index 00000000..c66cb357 --- /dev/null +++ b/modules/local/ms2pip.nf @@ -0,0 +1,41 @@ +process MS2PIP { + tag "$meta.id" + label 'process_low' + + conda "bioconda::ms2pip=3.11.0 bioconda::pyopenms=2.9.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' : + 'biocontainers/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' }" + + input: + tuple val(meta), path(idxml_in), path(mzml) + + output: + tuple val(meta), path('*.idXML'), emit: idxml + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = idxml_in.baseName + def fragment_error = params.fragment_mass_tolerance * 2 + + """ + ms2pip_cli.py \\ + --input_idxml $idxml_in \\ + --input_mzml $mzml \\ + --output_idxml ${prefix}_ms2pip.idXML \\ + --num_hits ${params.num_hits} \\ + --model_name ${params.ms2pip_model_name} \\ + --fragment_error $fragment_error \\ + --variable_mods '${params.variable_mods}' \\ + --fixed_mods '${params.fixed_mods}' \\ + --num_cpus ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MS2PIP: \$(conda list | grep "ms2pip" | awk 'NR==2 {print \$2}') + END_VERSIONS + """ +} diff --git a/modules/local/openms_cometadapter.nf b/modules/local/openms_cometadapter.nf index e74dcb0f..d3f000d0 100644 --- a/modules/local/openms_cometadapter.nf +++ b/modules/local/openms_cometadapter.nf @@ -2,10 +2,10 @@ process OPENMS_COMETADAPTER { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) + conda "bioconda::openms-thirdparty=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.0.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.0.0--h9ee0642_1' }" input: tuple val(meta), path(mzml), path(fasta) @@ -23,6 +23,7 @@ process OPENMS_COMETADAPTER { def args = task.ext.args ?: '' def mods = params.fixed_mods != " " ? "-fixed_modifications ${params.fixed_mods.tokenize(',').collect { "'${it}'"}.join(" ")}" : "-fixed_modifications" + def params_file = params.default_params_file_comet != " " ? "-default_params_file ${params.default_params_file_comet}" : "" def xions = params.use_x_ions ? "-use_X_ions true" : "" def zions = params.use_z_ions ? "-use_Z_ions true" : "" def aions = params.use_a_ions ? "-use_A_ions true" : "" @@ -35,7 +36,8 @@ process OPENMS_COMETADAPTER { -out ${prefix}.idXML \\ -database $fasta \\ -threads $task.cpus \\ - -pin_out ${prefix}.tsv \\ + -pin_out ${prefix}_pin.tsv \\ + $params_file \\ $args \\ $mods \\ $xions \\ diff --git a/modules/local/openms_decoydatabase.nf b/modules/local/openms_decoydatabase.nf index 6811e71d..6994057f 100644 --- a/modules/local/openms_decoydatabase.nf +++ b/modules/local/openms_decoydatabase.nf @@ -1,11 +1,11 @@ process OPENMS_DECOYDATABASE { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/local/openms_falsediscoveryrate.nf b/modules/local/openms_falsediscoveryrate.nf index 84957efd..048dda8b 100644 --- a/modules/local/openms_falsediscoveryrate.nf +++ b/modules/local/openms_falsediscoveryrate.nf @@ -1,11 +1,11 @@ process OPENMS_FALSEDISCOVERYRATE { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(idxml) diff --git a/modules/local/openms_featurefinderidentification.nf b/modules/local/openms_featurefinderidentification.nf index df4c7472..bbdfb26d 100644 --- a/modules/local/openms_featurefinderidentification.nf +++ b/modules/local/openms_featurefinderidentification.nf @@ -2,13 +2,13 @@ process OPENMS_FEATUREFINDERIDENTIFICATION { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(id_quant_int), path(mzml), path(id_quant) + tuple val(meta), path(mzml), path(id_int), path(id_ext) output: tuple val(meta), path("*.featureXML"), emit: featurexml @@ -18,14 +18,16 @@ process OPENMS_FEATUREFINDERIDENTIFICATION { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_${meta.id}" - def arguments = params.quantification_fdr ? "-id $id_quant_int -id_ext $id_quant -svm:min_prob ${params.quantification_min_prob}" : "-id $id_quant" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}" + def args = task.ext.args ?: '' + def quant_fdr = params.quantification_fdr ? "-id $id_int -id_ext $id_ext -svm:min_prob ${params.quantification_min_prob}" : "-id $id_ext" + args = args + " $quant_fdr" """ FeatureFinderIdentification -in $mzml \\ -out ${prefix}.featureXML \\ -threads $task.cpus \\ - ${arguments} + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/openms_featurelinkerunlabeledkd.nf b/modules/local/openms_featurelinkerunlabeledkd.nf index 15cb724a..2765836a 100644 --- a/modules/local/openms_featurelinkerunlabeledkd.nf +++ b/modules/local/openms_featurelinkerunlabeledkd.nf @@ -1,11 +1,11 @@ process OPENMS_FEATURELINKERUNLABELEDKD { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) + conda "bioconda::openms-thirdparty=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.0.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.0.0--h9ee0642_1' }" input: tuple val(meta), path(features) diff --git a/modules/local/openms_filefilter.nf b/modules/local/openms_filefilter.nf new file mode 100644 index 00000000..e526e959 --- /dev/null +++ b/modules/local/openms_filefilter.nf @@ -0,0 +1,33 @@ +process OPENMS_FILEFILTER { + tag "$meta.id" + label 'process_low' + + conda "bioconda::openms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(mzml) + + output: + tuple val(meta), path("*.mzML"), emit: cleaned_mzml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}" + """ + FileFilter -in $mzml \\ + -out ${prefix}.mzML \\ + -peak_options:rm_pc_charge 0 \\ + -threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/openms_idconflictresolver.nf b/modules/local/openms_idconflictresolver.nf index 35fb067d..7a1f795f 100644 --- a/modules/local/openms_idconflictresolver.nf +++ b/modules/local/openms_idconflictresolver.nf @@ -1,11 +1,11 @@ process OPENMS_IDCONFLICTRESOLVER { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(consensus) diff --git a/modules/local/openms_idfilter.nf b/modules/local/openms_idfilter.nf index 34af2352..fb946789 100644 --- a/modules/local/openms_idfilter.nf +++ b/modules/local/openms_idfilter.nf @@ -1,14 +1,14 @@ process OPENMS_IDFILTER { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(idxml), file(peptide_filter) + tuple val(meta), path(idxml), val(peptide_filter) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,20 +18,18 @@ process OPENMS_IDFILTER { task.ext.when == null || task.ext.when script: - def whitelist = "$peptide_filter" - def prefix = task.ext.prefix ?: "${meta.id}_-_${idxml.baseName}_filtered" + def prefix = task.ext.prefix ?: "${meta.id}_filtered" def args = task.ext.args ?: '' - if (whitelist == "input.2") { - whitelist = " " + if (peptide_filter != null) { + args += "-whitelist:peptides $peptide_filter" } """ IDFilter -in $idxml \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ - $args \\ - $whitelist + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/openms_idmerger.nf b/modules/local/openms_idmerger.nf index dc2ebf22..740ecfcf 100644 --- a/modules/local/openms_idmerger.nf +++ b/modules/local/openms_idmerger.nf @@ -1,14 +1,14 @@ process OPENMS_IDMERGER { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(aligned) + tuple val(meta), path(idxmls) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,10 +18,10 @@ process OPENMS_IDMERGER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_${meta.condition}_all_ids_merged" + def prefix = task.ext.prefix ?: "${meta.id}" """ - IDMerger -in $aligned \\ + IDMerger -in $idxmls \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ -annotate_file_origin true \\ diff --git a/modules/local/openms_idripper.nf b/modules/local/openms_idripper.nf new file mode 100644 index 00000000..64d3631c --- /dev/null +++ b/modules/local/openms_idripper.nf @@ -0,0 +1,34 @@ +process OPENMS_IDRIPPER { + tag "${meta.id}" + label 'process_single' + + conda "bioconda::openms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(merged_idxml) + + output: + tuple val(meta), path("*.idXML"), emit: ripped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + IDRipper -in $merged_idxml \\ + -out . \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/openms_idscoreswitcher.nf b/modules/local/openms_idscoreswitcher.nf new file mode 100644 index 00000000..1df324f0 --- /dev/null +++ b/modules/local/openms_idscoreswitcher.nf @@ -0,0 +1,38 @@ +process OPENMS_IDSCORESWITCHER { + tag "$meta.id" + label 'process_single' + + conda "bioconda::openms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(idxml), path(whitelist) + + output: + tuple val(meta), path("*.idXML"), path(whitelist), emit: switched_idxml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_switched" + def args = task.ext.args ?: '' + + """ + IDScoreSwitcher -in $idxml \\ + -out ${prefix}.idXML \\ + -threads $task.cpus \\ + -new_score 'COMET:xcorr' \\ + -new_score_orientation 'higher_better' \\ + -old_score 'q-value' \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/openms_mapaligneridentification.nf b/modules/local/openms_mapaligneridentification.nf index b8929028..550f59d2 100644 --- a/modules/local/openms_mapaligneridentification.nf +++ b/modules/local/openms_mapaligneridentification.nf @@ -1,14 +1,14 @@ process OPENMS_MAPALIGNERIDENTIFICATION { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(idxml) + tuple val(meta), path(idxmls) output: tuple val(meta), path("*.trafoXML"), emit: trafoxml @@ -18,11 +18,11 @@ process OPENMS_MAPALIGNERIDENTIFICATION { task.ext.when == null || task.ext.when script: - def out_names = idxml.collect { it.baseName+'.trafoXML' }.join(' ') + def out_names = idxmls.collect { it.baseName+'.trafoXML' }.join(' ') def args = task.ext.args ?: '' """ - MapAlignerIdentification -in $idxml \\ + MapAlignerIdentification -in $idxmls \\ -trafo_out ${out_names} \\ $args diff --git a/modules/local/openms_maprttransformer.nf b/modules/local/openms_maprttransformer.nf index 459e293c..afe6d007 100644 --- a/modules/local/openms_maprttransformer.nf +++ b/modules/local/openms_maprttransformer.nf @@ -1,11 +1,11 @@ process OPENMS_MAPRTTRANSFORMER { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(alignment_file), path(trafoxml) @@ -18,7 +18,7 @@ process OPENMS_MAPRTTRANSFORMER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}_aligned" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_aligned" def fileExt = alignment_file.collect { it.name.tokenize("\\.")[1] }.join(' ') """ diff --git a/modules/local/openms_mztabexporter.nf b/modules/local/openms_mztabexporter.nf index f45262bd..16056675 100644 --- a/modules/local/openms_mztabexporter.nf +++ b/modules/local/openms_mztabexporter.nf @@ -1,14 +1,14 @@ process OPENMS_MZTABEXPORTER { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(mztab) + tuple val(meta), path(in_file) output: tuple val(meta), path("*.mzTab"), emit: mztab @@ -18,11 +18,11 @@ process OPENMS_MZTABEXPORTER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_${meta.condition}" + def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' """ - MzTabExporter -in $mztab \\ + MzTabExporter -in $in_file \\ -out ${prefix}.mzTab \\ -threads $task.cpus \\ $args diff --git a/modules/local/openms_peakpickerhires.nf b/modules/local/openms_peakpickerhires.nf index a788149a..e8ca0afa 100644 --- a/modules/local/openms_peakpickerhires.nf +++ b/modules/local/openms_peakpickerhires.nf @@ -2,10 +2,10 @@ process OPENMS_PEAKPICKERHIRES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(mzml) diff --git a/modules/local/openms_peptideindexer.nf b/modules/local/openms_peptideindexer.nf index 2cd44915..48a21cd6 100644 --- a/modules/local/openms_peptideindexer.nf +++ b/modules/local/openms_peptideindexer.nf @@ -1,11 +1,11 @@ process OPENMS_PEPTIDEINDEXER { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(idxml), path(fasta) @@ -18,7 +18,7 @@ process OPENMS_PEPTIDEINDEXER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${idxml.baseName}_-_idx" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_idx" """ PeptideIndexer -in $idxml \\ diff --git a/modules/local/openms_percolatoradapter.nf b/modules/local/openms_percolatoradapter.nf index b477832e..b45e41cc 100644 --- a/modules/local/openms_percolatoradapter.nf +++ b/modules/local/openms_percolatoradapter.nf @@ -1,14 +1,14 @@ process OPENMS_PERCOLATORADAPTER { tag "$meta.id" - label 'process_high' + label 'process_low' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) + conda "bioconda::openms-thirdparty=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.0.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.0.0--h9ee0642_1' }" input: - tuple val(meta), path(psm) + tuple val(meta), path(merged_with_features) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,13 +18,13 @@ process OPENMS_PERCOLATORADAPTER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}_pout" def args = task.ext.args ?: '' def klammer = (params.description_correct_features > 0 && params.klammer) ? "-klammer" : "" """ OMP_NUM_THREADS=$task.cpus \\ - PercolatorAdapter -in $psm \\ + PercolatorAdapter -in $merged_with_features \\ -out ${prefix}.idXML \\ $klammer \\ $args diff --git a/modules/local/openms_psmfeatureextractor.nf b/modules/local/openms_psmfeatureextractor.nf index 052d76bc..7ec54ce7 100644 --- a/modules/local/openms_psmfeatureextractor.nf +++ b/modules/local/openms_psmfeatureextractor.nf @@ -2,13 +2,13 @@ process OPENMS_PSMFEATUREEXTRACTOR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(merged) + tuple val(meta), path(idxml) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,13 +18,32 @@ process OPENMS_PSMFEATUREEXTRACTOR { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${merged.baseName}_psm" + def prefix = task.ext.prefix ?: "${meta.id}_psm" def args = task.ext.args ?: '' + def extra_features = "" + if(params.use_deeplc || params.use_ms2pip){ + extra_features = "-extra" + } + if(params.use_deeplc){ + if(params.deeplc_add_abs_rt_error){ + extra_features = "${extra_features} deeplc_abs_error" + } + if(params.deeplc_add_log_rt_error){ + extra_features = "${extra_features} deeplc_log_error" + } + if(params.deeplc_add_sqr_rt_error || (!params.deeplc_add_sqr_rt_error && !params.deeplc_add_abs_rt_error && !params.deeplc_add_log_rt_error)){ + extra_features = "${extra_features} deeplc_sqr_error" + } + } + if(params.use_ms2pip){ + extra_features = "${extra_features} spectrum_correlation" + } """ - PSMFeatureExtractor -in $merged \\ + PSMFeatureExtractor -in $idxml \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ + $extra_features \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms_rtmodel.nf b/modules/local/openms_rtmodel.nf deleted file mode 100644 index dc224e7f..00000000 --- a/modules/local/openms_rtmodel.nf +++ /dev/null @@ -1,35 +0,0 @@ -process OPENMS_RTMODEL { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" - - input: - tuple val(meta), path(rt_training) - - output: - tuple val(meta), path("*_rt_training.txt"), path("*.paramXML"), path("*_trainset.txt"), emit: complete - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.sample}" - - """ - RTModel -in $rt_training \\ - -cv:skip_cv \\ - -out ${prefix}_rt_training.txt \\ - -out_oligo_params ${prefix}_params.paramXML \\ - -out_oligo_trainset ${prefix}_trainset.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_rtpredict.nf b/modules/local/openms_rtpredict.nf deleted file mode 100644 index 9e174670..00000000 --- a/modules/local/openms_rtpredict.nf +++ /dev/null @@ -1,35 +0,0 @@ -process OPENMS_RTPREDICT { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" - - input: - tuple val(meta), path(idxml), path(rt_model), path(rt_params), path(trainset) - - output: - tuple val(meta), path("*.csv"), emit: csv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.sample}_RTpredicted" - - """ - RTPredict -in_id $idxml \\ - -svm_model $rt_model \\ - -in_oligo_params $rt_params \\ - -in_oligo_trainset $trainset \\ - -out_text:file ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_textexporter.nf b/modules/local/openms_textexporter.nf index c63d1977..fc16d59e 100644 --- a/modules/local/openms_textexporter.nf +++ b/modules/local/openms_textexporter.nf @@ -1,14 +1,14 @@ process OPENMS_TEXTEXPORTER { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : - 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'quay.io/biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(consensus_resolved) + tuple val(meta), path(file) output: tuple val(meta), path("*.tsv"), emit: tsv @@ -22,7 +22,7 @@ process OPENMS_TEXTEXPORTER { def args = task.ext.args ?: '' """ - TextExporter -in $consensus_resolved \\ + TextExporter -in $file \\ -out ${prefix}.tsv \\ -threads $task.cpus \\ -id:add_hit_metavalues 0 \\ diff --git a/modules/local/predict_possible_neoepitopes.nf b/modules/local/predict_possible_neoepitopes.nf index 48b21cd3..c469b469 100644 --- a/modules/local/predict_possible_neoepitopes.nf +++ b/modules/local/predict_possible_neoepitopes.nf @@ -2,10 +2,10 @@ process PREDICT_POSSIBLE_NEOEPITOPES { tag "$meta" label 'process_low' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), val(alleles), path(vcf) diff --git a/modules/local/pyopenms_idfilter.nf b/modules/local/pyopenms_idfilter.nf new file mode 100644 index 00000000..c4fb4698 --- /dev/null +++ b/modules/local/pyopenms_idfilter.nf @@ -0,0 +1,34 @@ +process PYOPENMS_IDFILTER { + tag "$meta.id" + label 'process_single' + + conda "bioconda::pyopenms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pyopenms:3.0.0--py311h9b8898c_0' : + 'biocontainers/pyopenms:3.0.0--py311h9b8898c_0' }" + + input: + tuple val(meta), path(idxml), path(whitelist) + + output: + tuple val(meta), path("*_fdr_filtered.idXML") , emit: filtered + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_fdr_filtered" + + """ + IDFilter.py \\ + --input $idxml \\ + --whitelist $whitelist \\ + --output ${prefix}.idXML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyopenms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/pyopenms_ionannotator.nf b/modules/local/pyopenms_ionannotator.nf index 1972e91c..d2f4964e 100644 --- a/modules/local/pyopenms_ionannotator.nf +++ b/modules/local/pyopenms_ionannotator.nf @@ -1,24 +1,24 @@ process PYOPENMS_IONANNOTATOR { - tag "$sample" + tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::pyopenms=2.8.0" : null) + conda "bioconda::pyopenms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyopenms:2.8.0--py310h3dc0cdb_1' : - 'quay.io/biocontainers/pyopenms:2.8.0--py310h3dc0cdb_1' }" + 'https://depot.galaxyproject.org/singularity/pyopenms:3.0.0--py311h9b8898c_0' : + 'biocontainers/pyopenms:3.0.0--py311h9b8898c_0' }" input: - tuple val(sample), path(mzml), path(fdr_filtered_idxml) + tuple val(meta), path(mzml), path(fdr_filtered_idxml) output: - tuple val(sample), path("*.tsv"), path("*.tsv"), emit: tsv + tuple val(meta), path("*.tsv") , emit: tsv path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${mzml.baseName}" + def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' def xions = params.use_x_ions ? "-use_x_ions" : "" @@ -30,7 +30,7 @@ process PYOPENMS_IONANNOTATOR { get_ion_annotations.py \\ --input $mzml \\ -idxml $fdr_filtered_idxml \\ - --prefix $sample \\ + --prefix $meta.id \\ $args \\ $xions \\ $zions \\ diff --git a/modules/local/resolve_found_neoepitopes.nf b/modules/local/resolve_found_neoepitopes.nf index 647552d5..4dce9107 100644 --- a/modules/local/resolve_found_neoepitopes.nf +++ b/modules/local/resolve_found_neoepitopes.nf @@ -2,10 +2,10 @@ process RESOLVE_FOUND_NEOEPITOPES { tag "$meta" label 'process_low' - conda (params.enable_conda ? "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" : null) + conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' : - 'quay.io/biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" + 'biocontainers/mulled-v2-c3f301504f7fa2e7bf81c3783de19a9990ea3001:12b1b9f040fd92a80629d58f8a558dde4820eb15-0' }" input: tuple val(meta), path(mztab), path(neoepitopes) diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index aa75ed96..1f350859 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -5,7 +5,7 @@ process SAMPLESHEET_CHECK { conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet diff --git a/modules/local/tdf2mzml.nf b/modules/local/tdf2mzml.nf new file mode 100644 index 00000000..13ea49fc --- /dev/null +++ b/modules/local/tdf2mzml.nf @@ -0,0 +1,26 @@ +process TDF2MZML { + tag "$meta.id" + + container "docker.io/mfreitas/tdf2mzml" + + input: + tuple val(meta), path(tdf) + + output: + tuple val(meta), path("*.mzML"), emit: mzml + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${tdf.simpleName}" + + """ + tdf2mzml.py -i $tdf -o ${prefix}.mzML + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | cut -d ' ' -f2) + tdf2mzml: \$(echo 0.3.0) + END_VERSIONS + """ +} diff --git a/modules/local/openms_thermorawfileparser.nf b/modules/local/thermorawfileparser.nf similarity index 76% rename from modules/local/openms_thermorawfileparser.nf rename to modules/local/thermorawfileparser.nf index 65c1e7eb..8385a33c 100644 --- a/modules/local/openms_thermorawfileparser.nf +++ b/modules/local/thermorawfileparser.nf @@ -1,11 +1,11 @@ -process OPENMS_THERMORAWFILEPARSER { +process THERMORAWFILEPARSER { tag "$meta.id" - label 'process_medium' + label 'process_low' - conda (params.enable_conda ? "bioconda::thermorawfileparser=1.4.0" : null) + conda "bioconda::thermorawfileparser=1.4.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.0--ha8f3691_0' : - 'quay.io/biocontainers/thermorawfileparser:1.4.0--ha8f3691_0' }" + 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.2--ha8f3691_0' : + 'biocontainers/thermorawfileparser:1.4.2--ha8f3691_0' }" input: tuple val(meta), path(rawfile) @@ -25,7 +25,8 @@ process OPENMS_THERMORAWFILEPARSER { ThermoRawFileParser.sh \\ -i $rawfile \\ -f 2 \\ - -b ${prefix}.mzML + -o . + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 3df21765..ebc87273 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a0..c32657de 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 68f66bea..1fc387be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ebc29b27..f93b5ee5 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -37,7 +38,7 @@ output: description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: diff --git a/nextflow.config b/nextflow.config index 34011c95..7bcca384 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,8 +34,9 @@ params { fdr_level = 'peptide_level_fdrs' fixed_mods = ' ' fragment_bin_offset = 0.0 - fragment_mass_tolerance = 0.02 + fragment_mass_tolerance = 0.01 instrument = 'high_res' + default_params_file_comet = ' ' klammer = false max_rt_alignment_shift = 300 number_mods = 3 @@ -43,11 +44,15 @@ params { peptide_min_length = 8 peptide_max_length = 12 pick_ms_levels = 2 - predict_RT = false prec_charge = '2:3' precursor_mass_tolerance = 5 quantification_fdr = null quantification_min_prob = 0 + quantification_mz_window = 5 + quantification_rt_window = 0 + quantification_peak_width = 60 + quantification_min_peak_width = 0.2 + quantification_mapping_tolerance= 0 refine_fdr_on_predicted_subset = false remove_precursor_peak = false run_centroidisation = false @@ -62,6 +67,19 @@ params { variable_mods = 'Oxidation (M)' vcf_sheet = null annotate_ions = false + filter_mzml = false + + // DeepLC settings + use_deeplc = false + deeplc_calibration_mode = 'idx_bin' + deeplc_calibration_bins = 20 + deeplc_add_abs_rt_error = false + deeplc_add_sqr_rt_error = false + deeplc_add_log_rt_error = false + + // MS2PIP settings + use_ms2pip = false + ms2pip_model_name = 'Immuno-HCD' // MultiQC options skip_multiqc = false @@ -72,7 +90,6 @@ params { multiqc_methods_description = null // Boilerplate options - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -81,25 +98,29 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -112,15 +133,19 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -// Load nf-core/mhcquant custom config -try { - includeConfig "${params.custom_config_base}/pipeline/mhcquant.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/mhcquant profiles: ${params.custom_config_base}/pipeline/mhcquant.config") -} - +// Load nf-core/mhcquant custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/mhcquant.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/mhcquant profiles: ${params.custom_config_base}/pipeline/mhcquant.config") +// } profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { conda.enabled = true docker.enabled = false @@ -128,6 +153,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } mamba { conda.enabled = true @@ -137,14 +163,17 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -152,44 +181,76 @@ profiles { singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_deeplc { includeConfig 'conf/test_deeplc.config' } + test_ms2pip { includeConfig 'conf/test_ms2pip.config' } + test_ionannotator { includeConfig 'conf/test_ionannotator.config' } + test_full { includeConfig 'conf/test_full.config' } } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -206,32 +267,35 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/mhcquant' - author = """Leon Bichmann, Marissa Dubbelaar""" + author = """Leon Bichmann, Marissa Dubbelaar, Jonas Scheid, Steffen Lemke""" homePage = 'https://github.com/nf-core/mhcquant' description = """Identify and quantify peptides from mass spectrometry raw data""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '2.4.1dev' + nextflowVersion = '!>=23.04.0' + version = '2.5.0' doi = '10.1021/acs.jproteome.9b00313' } +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { diff --git a/nextflow_schema.json b/nextflow_schema.json index ddbf4152..31135be5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -17,6 +17,7 @@ "description": "Input raw / mzML files listed in a tsv file (see help for details)", "help_text": "Use this to specify a sample sheet table including your input raw or mzml files as well as their meta information such as SampleID and Condition. For example:\n\n| ID | Sample | Condition | ReplicateFileName |\n| -----|:------------:| ----------:|------------------------------------------:|\n| 1 | MM15_Melanom | A | data/MM15_Melanom_W_1_A_standard.raw |\n| 2 | MM15_Melanom | B | data/MM15_Melanom_W_1_B_standard.raw |\n| 3 | MM17_Melanom | B | data/MM17_Melanom_W_1_B_standard.raw |\n\n```bash\n--input 'path/samples.tsv'\n```", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.tsv$", "schema": "assets/schema_input.json", @@ -243,11 +244,21 @@ "default": "high_res", "fa_icon": "fas fa-wrench", "description": "Comets theoretical_fragment_ions parameter: theoretical fragment ion peak representation, high-res: sum of intensities plus flanking bins, ion trap (low-res) ms/ms: sum of intensities of central M bin only" + }, + "default_params_file_comet": { + "type": "string", + "fa_icon": "fas fa-file-code", + "description": "Default Comet params file. All parameters of this take precedence." + }, + "filter_mzml": { + "type": "boolean", + "fa_icon": "fas fa-file-code", + "description": "Clean up mzml files and remove artificial charge 0 peptides." } } }, - "fdr_scoring": { - "title": "FDR Scoring", + "rescoring": { + "title": "Rescoring", "type": "object", "fa_icon": "fas fa-star-half-stroke", "description": "", @@ -294,6 +305,49 @@ "type": "integer", "fa_icon": "fas fa-train-track", "description": "Maximum subset for percolator training iterations" + }, + "use_deeplc": { + "type": "boolean", + "fa_icon": "fas fa-microchip", + "description": "Use DeepLC retention time features for Percolator rescoring", + "help_text": "https://www.nature.com/articles/s41592-021-01301-5" + }, + "deeplc_calibration_bins": { + "type": "integer", + "fa_icon": "fas fa-train-track", + "description": "Number of bins (peptides) used for DeepLC calibration. For each bin the best hit is used." + }, + "deeplc_calibration_mode": { + "type": "string", + "fa_icon": "fas fa-train-track", + "description": "Specify the DeepLC calibration mode. rt_bin: bin peptides by RT, idx_bin: bin peptides by index, min_max: scale uncalibrated predictions to experimental RT range", + "enum": ["rt_bin", "idx_bin", "min_max"] + }, + "deeplc_add_abs_rt_error": { + "type": "boolean", + "fa_icon": "fas fa-train-track", + "description": "Add absolute RT error to of experimental and predicted RT to the feature set" + }, + "deeplc_add_sqr_rt_error": { + "type": "boolean", + "fa_icon": "fas fa-train-track", + "description": "Add squared RT error to of experimental and predicted RT to the feature set" + }, + "deeplc_add_log_rt_error": { + "type": "boolean", + "fa_icon": "fas fa-train-track", + "description": "Add log RT error to of experimental and predicted RT to the feature set" + }, + "use_ms2pip": { + "type": "boolean", + "fa_icon": "fas fa-microchip", + "description": "Use MS2pip peak intensity prediction for Percolator rescoring", + "help_text": "https://github.com/compomics/ms2pip" + }, + "ms2pip_model_name": { + "type": "string", + "fa_icon": "fas fa-train-track", + "description": "MS2pip model name defined (https://github.com/compomics/ms2pip#specialized-prediction-models)" } } }, @@ -310,7 +364,7 @@ "description": "Skip quantification and only yield peptide identifications" }, "quantification_fdr": { - "type": "string", + "type": "boolean", "fa_icon": "fas fa-less-than", "description": "Compute FDR for the targeted approach", "help_text": "(Weisser H. and Choudhary J.S. J Proteome Res. 2017 Aug 4)" @@ -318,6 +372,26 @@ "quantification_min_prob": { "type": "number", "description": "Specify a cut off probability value for quantification events as a filter" + }, + "quantification_mz_window": { + "type": "number", + "description": "Specify a m/z window for matching between runs" + }, + "quantification_rt_window": { + "type": "number", + "description": "Specify a rt window for matching between runs" + }, + "quantification_mapping_tolerance": { + "type": "number", + "description": "Specify a rt mapping tolerance for mapping features between runs" + }, + "quantification_peak_width": { + "type": "number", + "description": "Specify a peak width for feature extraction" + }, + "quantification_min_peak_width": { + "type": "number", + "description": "Specify a minimum peak width for quantification" } } }, @@ -387,20 +461,6 @@ } } }, - "rt_prediction": { - "title": "RT Prediction", - "type": "object", - "fa_icon": "fas fa-timeline", - "description": "", - "default": "", - "properties": { - "predict_RT": { - "type": "boolean", - "fa_icon": "fas fa-wrench", - "description": "Set this option to predict retention times of all identified peptides and possible neoepitopes based on high scoring ids" - } - } - }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -485,7 +545,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^[\\d\\.]+\\.*(s|m|h|d|day)$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -556,6 +616,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -571,13 +632,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -585,12 +639,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } @@ -609,7 +677,7 @@ "$ref": "#/definitions/mass_spectrometry_data_processing" }, { - "$ref": "#/definitions/fdr_scoring" + "$ref": "#/definitions/rescoring" }, { "$ref": "#/definitions/quantification_options" @@ -620,9 +688,6 @@ { "$ref": "#/definitions/variant_options" }, - { - "$ref": "#/definitions/rt_prediction" - }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 2dfa1fd9..087d71c0 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -13,10 +13,10 @@ workflow INPUT_CHECK { .csv .splitCsv ( header:true, sep:'\t' ) .map { create_ms_channel(it) } - .set { reads } + .set { ms_runs } emit: - reads // channel: [ val(meta), [ reads ] ] + ms_runs // channel: [ val(meta), [ runs ] ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/map_alignment.nf b/subworkflows/local/map_alignment.nf index e038f7e7..90efc013 100644 --- a/subworkflows/local/map_alignment.nf +++ b/subworkflows/local/map_alignment.nf @@ -1,9 +1,6 @@ /* - * Perform the quantification of the samples when the parameter --skip_quantification is not provided + * Align retention times of runs to be able to quantify them. */ - -include { OPENMS_FALSEDISCOVERYRATE } from '../../modules/local/openms_falsediscoveryrate' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_FOR_ALIGNMENT } from '../../modules/local/openms_idfilter' include { OPENMS_MAPALIGNERIDENTIFICATION } from '../../modules/local/openms_mapaligneridentification' include { OPENMS_MAPRTTRANSFORMER as OPENMS_MAPRTTRANSFORMERMZML @@ -12,58 +9,44 @@ include { workflow MAP_ALIGNMENT { take: - indexed_hits - mzml_files + runs_to_be_aligned + mzml + merge_meta_map main: ch_versions = Channel.empty() - // Calculate fdr for id based alignment - OPENMS_FALSEDISCOVERYRATE(indexed_hits) - ch_versions = ch_versions.mix(OPENMS_FALSEDISCOVERYRATE.out.versions.first().ifEmpty(null)) - // Filter fdr for id based alignment - OPENMS_IDFILTER_FOR_ALIGNMENT(OPENMS_FALSEDISCOVERYRATE.out.idxml - .flatMap { it -> [tuple(it[0], it[1], null)]}) - ch_versions = ch_versions.mix(OPENMS_IDFILTER_FOR_ALIGNMENT.out.versions.first().ifEmpty(null)) - // Group samples together if they are replicates - ch_grouped_fdr_filtered = OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml - .map { - meta, raw -> - [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] - } - .groupTuple(by: [0]) - // Compute alignment rt transformation - OPENMS_MAPALIGNERIDENTIFICATION(ch_grouped_fdr_filtered) + // Compute group-wise alignment rt transformation + OPENMS_MAPALIGNERIDENTIFICATION( runs_to_be_aligned ) ch_versions = ch_versions.mix(OPENMS_MAPALIGNERIDENTIFICATION.out.versions.first().ifEmpty(null)) - // Obtain the unique files that were present for the combined data - joined_trafos = OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml + + // Join run specific trafoXMLs with meta information + merge_meta_map + .join( OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml ) + .map { groupMeta, meta, trafoxml -> [meta, trafoxml] } .transpose() - .flatMap { - meta, trafoxml -> - ident = trafoxml.baseName.split('_-_')[0] - [[[id:ident, sample:meta.sample, condition:meta.condition, ext:meta.ext], trafoxml]] - } - // Intermediate step to join RT transformation files with mzml channels - joined_trafos_mzmls = mzml_files.join(joined_trafos) - // Intermediate step to join RT transformation files with idxml channels - joined_trafos_ids = indexed_hits.join(joined_trafos) + .set { joined_trafos } + + // Intermediate step to join RT transformation files with mzml channels -> [meta, idxml, mzml] + joined_trafos_mzmls = mzml.join(joined_trafos) + + // Intermediate step to join RT transformation files with idxml channels -> [meta, idxml, trafoxml] + runs_to_be_aligned + .join( merge_meta_map ) + .map { group_meta, idxml, meta -> [meta, idxml] } + .transpose() + .join( joined_trafos ) + .set { joined_trafos_ids } + // Align mzML files using trafoXMLs OPENMS_MAPRTTRANSFORMERMZML(joined_trafos_mzmls) ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERMZML.out.versions.first().ifEmpty(null)) - // Align unfiltered idXMLfiles using trafoXMLs + // Align idXMLfiles using trafoXMLs OPENMS_MAPRTTRANSFORMERIDXML(joined_trafos_ids) ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERIDXML.out.versions.first().ifEmpty(null)) - ch_proceeding_idx = OPENMS_MAPRTTRANSFORMERIDXML.out.aligned - .map { - meta, raw -> - [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] - } - .groupTuple(by: [0]) emit: - // Define the information that is returned by this workflow versions = ch_versions - ch_proceeding_idx - aligned_idfilter = OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml + aligned_idxml = OPENMS_MAPRTTRANSFORMERIDXML.out.aligned aligned_mzml = OPENMS_MAPRTTRANSFORMERMZML.out.aligned } diff --git a/subworkflows/local/predict_class1.nf b/subworkflows/local/predict_class1.nf index 70532717..948b8f91 100644 --- a/subworkflows/local/predict_class1.nf +++ b/subworkflows/local/predict_class1.nf @@ -16,18 +16,15 @@ workflow PREDICT_CLASS1 { main: ch_versions = Channel.empty() ch_predicted_possible_neoepitopes = Channel.empty() + alleles = peptides_class_1_alleles.map{ meta, alleles -> [[id:meta], alleles] } // If specified predict peptides using MHCFlurry - MHCFLURRY_PREDICTPEPTIDESCLASS1( - mztab - .map{ it -> [it[0].sample, it[0], it[1]] } - .combine( peptides_class_1_alleles, by:0) - .map( it -> [it[1], it[2], it[3]]) - ) + MHCFLURRY_PREDICTPEPTIDESCLASS1(mztab.join(alleles)) ch_versions = ch_versions.mix(MHCFLURRY_PREDICTPEPTIDESCLASS1.out.versions.first().ifEmpty(null)) + if ( params.include_proteins_from_vcf ) { // Predict all possible neoepitopes from vcf - PREDICT_POSSIBLE_CLASS1_NEOEPITOPES(peptides_class_1_alleles.combine(ch_vcf_from_sheet, by:0)) + PREDICT_POSSIBLE_CLASS1_NEOEPITOPES(alleles.combine(ch_vcf_from_sheet, by:0)) ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.versions.first().ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.csv // Resolve found neoepitopes @@ -39,7 +36,7 @@ workflow PREDICT_CLASS1 { ) ch_versions = ch_versions.mix(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.versions.first().ifEmpty(null)) // Predict class 1 neoepitopes MHCFlurry - MHCFLURRY_PREDICTNEOEPITOPESCLASS1(peptides_class_1_alleles.join(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.csv, by:0)) + MHCFLURRY_PREDICTNEOEPITOPESCLASS1(alleles.join(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.csv, by:0)) ch_versions = ch_versions.mix(MHCFLURRY_PREDICTNEOEPITOPESCLASS1.out.versions.first().ifEmpty(null)) } diff --git a/subworkflows/local/predict_class2.nf b/subworkflows/local/predict_class2.nf index 4baa282d..41f3c7cd 100644 --- a/subworkflows/local/predict_class2.nf +++ b/subworkflows/local/predict_class2.nf @@ -20,16 +20,16 @@ workflow PREDICT_CLASS2 { main: ch_versions = Channel.empty() ch_predicted_possible_neoepitopes = Channel.empty() + alleles = peptides_class_2_alleles.map{meta, alleles -> [[id:meta], alleles]} // Preprocess found peptides for MHCNuggets prediction class 2 MHCNUGGETS_PEPTIDESCLASS2PRE(mztab) ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2PRE.out.versions.first().ifEmpty(null)) + // Predict found peptides using MHCNuggets class 2 MHCNUGGETS_PREDICTPEPTIDESCLASS2( MHCNUGGETS_PEPTIDESCLASS2PRE.out.preprocessed - .map{ it -> [it[0].sample, it[0], it[1]] } - .join(peptides_class_2_alleles, by:0) - .map( it -> [it[1], it[2], it[3]]) + .join(alleles) ) ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTPEPTIDESCLASS2.out.versions.first().ifEmpty(null)) // Postprocess predicted MHCNuggets peptides class 2 @@ -37,7 +37,7 @@ workflow PREDICT_CLASS2 { ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2POST.out.versions.first().ifEmpty(null)) if ( params.include_proteins_from_vcf ) { // Predict all possible class 2 neoepitopes from vcf - PREDICT_POSSIBLE_CLASS2_NEOEPITOPES(peptides_class_2_alleles.combine(ch_vcf_from_sheet, by:0)) + PREDICT_POSSIBLE_CLASS2_NEOEPITOPES(alleles.combine(ch_vcf_from_sheet, by:0)) ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.versions.first().ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.csv // Resolve found class 2 neoepitopes @@ -51,7 +51,7 @@ workflow PREDICT_CLASS2 { MHCNUGGETS_NEOEPITOPESCLASS2PRE(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.csv) ch_versions = ch_versions.mix(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.versions.first().ifEmpty(null)) // Predict class 2 MHCNuggets - MHCNUGGETS_PREDICTNEOEPITOPESCLASS2(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.preprocessed.join(peptides_class_2_alleles, by:0)) + MHCNUGGETS_PREDICTNEOEPITOPESCLASS2(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.preprocessed.join(alleles, by:0)) ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.versions.first().ifEmpty(null)) // Class 2 MHCNuggets Postprocessing MHCNUGGETS_NEOEPITOPESCLASS2POST(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.csv.join(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.csv, by:0)) diff --git a/subworkflows/local/predict_rt.nf b/subworkflows/local/predict_rt.nf deleted file mode 100644 index 894eddf9..00000000 --- a/subworkflows/local/predict_rt.nf +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Perform the Retention time prediction when the parameter --predict_RT is provided - */ - -include { OPENMS_RTMODEL } from '../../modules/local/openms_rtmodel' -include { - OPENMS_RTPREDICT as OPENMS_RTPREDICT_FOUND_PEPTIDES - OPENMS_RTPREDICT as OPENMS_RTPREDICT_NEOEPITOPES} from '../../modules/local/openms_rtpredict' - - -workflow PREDICT_RT { - take: - filter_q_value - ch_predicted_possible_neoepitopes - ch_predicted_possible_neoepitopes_II - - main: - ch_versions = Channel.empty() - - // Train Retention Times Predictor - OPENMS_RTMODEL(filter_q_value) - ch_versions = ch_versions.mix(OPENMS_RTMODEL.out.versions.first().ifEmpty(null)) - // Retention Times Predictor Found Peptides - OPENMS_RTPREDICT_FOUND_PEPTIDES(filter_q_value.join(OPENMS_RTMODEL.out.complete, by:[0])) - ch_versions = ch_versions.mix(OPENMS_RTPREDICT_FOUND_PEPTIDES.out.versions.first().ifEmpty(null)) - // Retention Times Predictor possible Neoepitopes - OPENMS_RTPREDICT_NEOEPITOPES(ch_predicted_possible_neoepitopes.mix(ch_predicted_possible_neoepitopes_II).join(OPENMS_RTMODEL.out.complete, by:[0])) - ch_versions = ch_versions.mix(OPENMS_RTPREDICT_FOUND_PEPTIDES.out.versions.first().ifEmpty(null)) - - emit: - // Define the information that is returned by this workflow - versions = ch_versions -} diff --git a/subworkflows/local/process_feature.nf b/subworkflows/local/process_feature.nf index cc8e3dfd..176861c7 100644 --- a/subworkflows/local/process_feature.nf +++ b/subworkflows/local/process_feature.nf @@ -1,51 +1,36 @@ /* - * Perform the quantification of the samples when the parameter --skip_quantification is not provided + * Perform the quantification by extracting the feature intensities and group runs corresponding to the same sample and condition. */ +include { OPENMS_IDMERGER } from '../../modules/local/openms_idmerger' include { OPENMS_FEATUREFINDERIDENTIFICATION } from '../../modules/local/openms_featurefinderidentification' include { OPENMS_FEATURELINKERUNLABELEDKD } from '../../modules/local/openms_featurelinkerunlabeledkd' include { OPENMS_IDCONFLICTRESOLVER } from '../../modules/local/openms_idconflictresolver' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_QUANTIFIED } from '../../modules/local/openms_textexporter' -include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTER_QUANT } from '../../modules/local/openms_mztabexporter' workflow PROCESS_FEATURE { take: - psms_outcome - aligned_mzml - filter_q_value + ch_runs_to_be_quantified main: ch_versions = Channel.empty() - // Combining the necessary information into one channel - psms_outcome - .join( aligned_mzml, by: [0] ) - .map { it -> [it[0].sample, it[0], it[1], it[2]] } - .combine( filter_q_value , by: [0] ) - .map { it -> [it[1], it[2], it[3], it[5]] } - .set{ joined_mzmls_ids_quant } + // Quantify identifications using targeted feature extraction - OPENMS_FEATUREFINDERIDENTIFICATION(joined_mzmls_ids_quant) + OPENMS_FEATUREFINDERIDENTIFICATION(ch_runs_to_be_quantified).featurexml + .map { meta, featurexml -> [[id: meta.sample + '_' + meta.condition], featurexml] } + .groupTuple() + .set { ch_features_grouped } ch_versions = ch_versions.mix(OPENMS_FEATUREFINDERIDENTIFICATION.out.versions.first().ifEmpty(null)) + // Link extracted features - OPENMS_FEATURELINKERUNLABELEDKD( - OPENMS_FEATUREFINDERIDENTIFICATION.out.featurexml - .flatMap { - meta, raw -> - [[[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw]] - } - .groupTuple(by:[0])) + OPENMS_FEATURELINKERUNLABELEDKD(ch_features_grouped) ch_versions = ch_versions.mix(OPENMS_FEATURELINKERUNLABELEDKD.out.versions.first().ifEmpty(null)) + // Resolve conflicting ids matching to the same feature OPENMS_IDCONFLICTRESOLVER(OPENMS_FEATURELINKERUNLABELEDKD.out.consensusxml) ch_versions = ch_versions.mix(OPENMS_IDCONFLICTRESOLVER.out.versions.first().ifEmpty(null)) - // Export all information as text to csv - OPENMS_TEXTEXPORTER_QUANTIFIED(OPENMS_IDCONFLICTRESOLVER.out.consensusxml) - ch_versions = ch_versions.mix(OPENMS_TEXTEXPORTER_QUANTIFIED.out.versions.first().ifEmpty(null)) - // Export all information as mzTab - OPENMS_MZTABEXPORTER_QUANT(OPENMS_IDCONFLICTRESOLVER.out.consensusxml) - ch_versions = ch_versions.mix(OPENMS_MZTABEXPORTER_QUANT.out.versions.first().ifEmpty(null)) + emit: // Define the information that is returned by this workflow versions = ch_versions - mztab = OPENMS_MZTABEXPORTER_QUANT.out.mztab + consensusxml = OPENMS_IDCONFLICTRESOLVER.out.consensusxml } diff --git a/subworkflows/local/quant.nf b/subworkflows/local/quant.nf new file mode 100644 index 00000000..64c3d41d --- /dev/null +++ b/subworkflows/local/quant.nf @@ -0,0 +1,84 @@ +/* + * Perform the quantification of the samples when the parameter --skip_quantification is not provided + * This workflow splits the merged percolator output into the individual runs and filters them based on the q-value + * It then aligns the retention times of the runs and merges the idxml files together to use them as id_ext in featurefinder + * Finally, it performs the quantification and emits the consensusXML file + */ +include { OPENMS_IDRIPPER } from '../../modules/local/openms_idripper' +include { OPENMS_IDSCORESWITCHER } from '../../modules/local/openms_idscoreswitcher' +include { PYOPENMS_IDFILTER } from '../../modules/local/pyopenms_idfilter' +include { OPENMS_IDMERGER as OPENMS_IDMERGER_QUANT } from '../../modules/local/openms_idmerger' + +include { MAP_ALIGNMENT } from './map_alignment' +include { PROCESS_FEATURE } from './process_feature' + +// Sort closure for merging and splitting files +def sortById = { a, b -> a.id <=> b.id } + +workflow QUANT { + take: + merge_meta_map + merged_pout + filter_q_value + mzml + + main: + ch_versions = Channel.empty() + // Rip post-percolator idXML files and manipulate such that we end up with [meta_run1, idxml_run1, pout_filtered] [meta_run2, idxml_run2, pout_filtered] ... + OPENMS_IDRIPPER( merged_pout ).ripped + .join( merge_meta_map ) + .join( filter_q_value ) + .map { group_meta, ripped, meta, fdrfiltered -> [meta, ripped, fdrfiltered] } + .transpose() + .set { ch_ripped_pout } + ch_versions = ch_versions.mix(OPENMS_IDRIPPER.out.versions.ifEmpty(null)) + + // Switch to xcorr for filtering since q-values are set to 1 with peptide-level-fdr + if (params.fdr_level == 'peptide_level_fdrs'){ + ch_runs_to_be_filtered = OPENMS_IDSCORESWITCHER( ch_ripped_pout ).switched_idxml + ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions.ifEmpty(null)) + } else { + ch_runs_to_be_filtered = ch_ripped_pout + } + + // Filter runs based on fdr filtered coprocessed percolator output. + // NOTE: This is an alternative filtering method that will be replaced by IDFilter with new release of OpenMS + PYOPENMS_IDFILTER( ch_runs_to_be_filtered ).filtered + .map { meta, idxml -> [[id:meta.sample + '_' + meta.condition], [id:meta.id, file:idxml]] } + .groupTuple( sort: sortById ) + .map { meta, idxml -> [meta, idxml.file] } + .set { ch_runs_to_be_aligned } + ch_versions = ch_versions.mix(PYOPENMS_IDFILTER.out.versions.ifEmpty(null)) + + // Align retention times of runs + MAP_ALIGNMENT( + ch_runs_to_be_aligned, + mzml, + merge_meta_map + ) + ch_versions = ch_versions.mix( MAP_ALIGNMENT.out.versions.ifEmpty(null) ) + + // We need to merge groupwise the aligned idxml files together to use them as id_ext in featurefinder + OPENMS_IDMERGER_QUANT( MAP_ALIGNMENT.out.aligned_idxml + .map { meta, aligned_idxml -> [[id: meta.sample + '_' + meta.condition], aligned_idxml] } + .groupTuple()) + ch_versions = ch_versions.mix(OPENMS_IDMERGER_QUANT.out.versions.ifEmpty(null)) + + // Manipulate channels such that we end up with : [meta, mzml, run_idxml, merged_runs_idxml] + MAP_ALIGNMENT.out.aligned_mzml + .join( MAP_ALIGNMENT.out.aligned_idxml ) + .map { meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] } + .groupTuple( sort: sortById ) + .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file] } + .join( OPENMS_IDMERGER_QUANT.out.idxml ) + .map { group_meta, meta, mzml, idxml, merged_idxml -> [meta, mzml, idxml, merged_idxml] } + .transpose() + .set { ch_runs_to_be_quantified } + + PROCESS_FEATURE ( ch_runs_to_be_quantified ) + ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions.ifEmpty(null)) + + emit: + consensusxml = PROCESS_FEATURE.out.consensusxml + versions = ch_versions +} diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index a32e0063..787caa60 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -1,11 +1,18 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) -// Validate input parameters +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation + WorkflowMhcquant.initialise(params, log) // Input/output options @@ -59,21 +66,25 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // MODULE: Loaded from modules/local/ // include { OPENMS_DECOYDATABASE } from '../modules/local/openms_decoydatabase' -include { OPENMS_THERMORAWFILEPARSER } from '../modules/local/openms_thermorawfileparser' +include { THERMORAWFILEPARSER } from '../modules/local/thermorawfileparser' +include { TDF2MZML } from '../modules/local/tdf2mzml' include { OPENMS_PEAKPICKERHIRES } from '../modules/local/openms_peakpickerhires' +include { OPENMS_FILEFILTER } from '../modules/local/openms_filefilter' include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' - -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_COMET } from '../modules/local/openms_textexporter' +include { DEEPLC } from '../modules/local/deeplc' +include { MS2PIP } from '../modules/local/ms2pip' include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/local/openms_idfilter' include { OPENMS_IDMERGER } from '../modules/local/openms_idmerger' + include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_FDR } from '../modules/local/openms_textexporter' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_UNQUANTIFIED } from '../modules/local/openms_textexporter' +include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' +include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' + // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -100,14 +111,14 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft // Info required for completion email and summary def multiqc_report = [] +// Sort closure for merging and splitting files +def sortById = { a, b -> a.id <=> b.id } include { INCLUDE_PROTEINS } from '../subworkflows/local/include_proteins' -include { MAP_ALIGNMENT } from '../subworkflows/local/map_alignment' include { REFINE_FDR } from '../subworkflows/local/refine_fdr' -include { PROCESS_FEATURE } from '../subworkflows/local/process_feature.nf' +include { QUANT } from '../subworkflows/local/quant' include { PREDICT_CLASS1 } from '../subworkflows/local/predict_class1' include { PREDICT_CLASS2 } from '../subworkflows/local/predict_class2' -include { PREDICT_RT } from '../subworkflows/local/predict_rt' //////////////////////////////////////////////////// /* -- RUN MAIN WORKFLOW -- */ @@ -119,25 +130,30 @@ workflow MHCQUANT { // // SUBWORKFLOW: Check the input file // - INPUT_CHECK(params.input) - .reads - .set { ch_samples_from_sheet } + INPUT_CHECK ( + file(params.input) + ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema - ch_samples_from_sheet + INPUT_CHECK.out.ms_runs .branch { meta, filename -> raw : meta.ext == 'raw' - return [ meta, filename ] + return [ meta.subMap('id', 'sample', 'condition'), filename ] mzml : meta.ext == 'mzml' - return [ meta, filename ] + return [ meta.subMap('id', 'sample', 'condition'), filename ] + tdf : meta.ext == 'd' + return [ meta.subMap('id', 'sample', 'condition'), filename ] other : true } - .set { ms_files } + .set { branched_ms_files } // Input fasta file Channel.fromPath(params.fasta) - .combine(ch_samples_from_sheet) - .flatMap{ it -> [tuple(it[1],it[0])] } + .combine(INPUT_CHECK.out.ms_runs) + .map{ fasta, meta, ms_file -> [meta.subMap('id', 'sample', 'condition'), fasta] } .ifEmpty { exit 1, "params.fasta was empty - no input file supplied" } .set { input_fasta } @@ -164,14 +180,20 @@ workflow MHCQUANT { ch_decoy_db = ch_fasta_file } + // If mzml files are specified, they are encapsulated in a list [meta, [mzml]]. We need to extract the path for grouping later + ch_ms_files = branched_ms_files.mzml.map{ meta, mzml -> [meta, mzml[0]]} // Raw file conversion - OPENMS_THERMORAWFILEPARSER(ms_files.raw) - ch_versions = ch_versions.mix(OPENMS_THERMORAWFILEPARSER.out.versions.ifEmpty(null)) - // Define the ch_ms_files channels to combine the mzml files - ch_ms_files = OPENMS_THERMORAWFILEPARSER.out.mzml.mix(ms_files.mzml.map{ it -> [it[0], it[1][0]] }) + THERMORAWFILEPARSER(branched_ms_files.raw) + ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions.ifEmpty(null)) + ch_ms_files = ch_ms_files.mix(THERMORAWFILEPARSER.out.mzml) + // timsTOF data conversion + TDF2MZML(branched_ms_files.tdf) + ch_versions = ch_versions.mix(TDF2MZML.out.versions.ifEmpty(null)) + ch_ms_files = ch_ms_files.mix(TDF2MZML.out.mzml) + + // Optional: Run Peak Picking as Preprocessing if (params.run_centroidisation) { - // Optional: Run Peak Picking as Preprocessing OPENMS_PEAKPICKERHIRES(ch_ms_files) ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions.ifEmpty(null)) ch_mzml_file = OPENMS_PEAKPICKERHIRES.out.mzml @@ -179,55 +201,66 @@ workflow MHCQUANT { ch_mzml_file = ch_ms_files } + // Optionally clean up mzML files + if (params.filter_mzml){ + OPENMS_FILEFILTER(ch_mzml_file) + ch_versions = ch_versions.mix(OPENMS_FILEFILTER.out.versions.ifEmpty(null)) + ch_clean_mzml_file = OPENMS_FILEFILTER.out.cleaned_mzml + } else { + ch_clean_mzml_file = ch_mzml_file + } + // Run comet database search - OPENMS_COMETADAPTER( - ch_mzml_file.join(ch_decoy_db, remainder:true)) - // Write this information to an tsv file - OPENMS_TEXTEXPORTER_COMET(OPENMS_COMETADAPTER.out.idxml) - ch_versions = ch_versions.mix(OPENMS_COMETADAPTER.out.versions.ifEmpty(null)) - // Index decoy and target hits - OPENMS_PEPTIDEINDEXER(OPENMS_COMETADAPTER.out.idxml.join(ch_decoy_db)) - ch_versions = ch_versions.mix(OPENMS_PEPTIDEINDEXER.out.versions.ifEmpty(null)) + OPENMS_COMETADAPTER(ch_clean_mzml_file.join(ch_decoy_db, remainder:true)) - // - // SUBWORKFLOW: Pre-process step for the quantification of the data - // - if (!params.skip_quantification) { - MAP_ALIGNMENT( - OPENMS_PEPTIDEINDEXER.out.idxml, - ch_mzml_file - ) - ch_proceeding_idx = MAP_ALIGNMENT.out.ch_proceeding_idx - ch_versions = ch_versions.mix(MAP_ALIGNMENT.out.versions.ifEmpty(null)) + // Run DeepLC if specified + if (params.use_deeplc){ + DEEPLC(OPENMS_COMETADAPTER.out.idxml) + ch_versions = ch_versions.mix(DEEPLC.out.versions.ifEmpty(null)) + ch_comet_out_idxml = DEEPLC.out.idxml + } else { + ch_comet_out_idxml = OPENMS_COMETADAPTER.out.idxml + } + + // Run MS2PIP if specified + if (params.use_ms2pip){ + MS2PIP(ch_comet_out_idxml.join(ch_clean_mzml_file)) + ch_versions = ch_versions.mix(MS2PIP.out.versions.ifEmpty(null)) + ch_comet_out_idxml_proceeding = MS2PIP.out.idxml } else { - ch_proceeding_idx = OPENMS_PEPTIDEINDEXER.out.idxml - .map { - meta, raw -> - [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] - } - .groupTuple(by: [0]) + ch_comet_out_idxml_proceeding = ch_comet_out_idxml } + // Index decoy and target hits + OPENMS_PEPTIDEINDEXER(ch_comet_out_idxml_proceeding.join(ch_decoy_db)) + ch_versions = ch_versions.mix(OPENMS_PEPTIDEINDEXER.out.versions.ifEmpty(null)) + + // Save indexed runs for later use to keep meta-run information. Sort based on file id + OPENMS_PEPTIDEINDEXER.out.idxml + .map { meta, idxml -> [[id: meta.sample + '_' + meta.condition], meta] } + .groupTuple( sort: sortById ) + .set { merge_meta_map } + + OPENMS_PEPTIDEINDEXER.out.idxml + .map { meta, idxml -> [[id: meta.sample + '_' + meta.condition], idxml] } + .groupTuple() + .set { ch_runs_to_merge } + // Merge aligned idXMLfiles - OPENMS_IDMERGER(ch_proceeding_idx) + OPENMS_IDMERGER(ch_runs_to_merge) ch_versions = ch_versions.mix(OPENMS_IDMERGER.out.versions.ifEmpty(null)) + // Extract PSM features for Percolator OPENMS_PSMFEATUREEXTRACTOR(OPENMS_IDMERGER.out.idxml) ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions.ifEmpty(null)) + // Run Percolator OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml) ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions.ifEmpty(null)) + // Filter by percolator q-value OPENMS_IDFILTER_Q_VALUE(OPENMS_PERCOLATORADAPTER.out.idxml.flatMap { it -> [tuple(it[0], it[1], null)] }) ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions.ifEmpty(null)) - // Prepare for check if file is empty - OPENMS_TEXTEXPORTER_FDR(OPENMS_IDFILTER_Q_VALUE.out.idxml) - // Return an error message when there is only a header present in the document - OPENMS_TEXTEXPORTER_FDR.out.tsv.map { - meta, tsv -> if (tsv.size() < 130) { - log.warn "It seems that there were no significant hits found for this sample: " + meta.sample + "\nPlease consider incrementing the '--fdr_threshold' after removing the work directory or to exclude this sample. " - } - } // // SUBWORKFLOW: Refine the FDR values on the predicted subset @@ -241,36 +274,45 @@ workflow MHCQUANT { ) ch_versions = ch_versions.mix(REFINE_FDR.out.versions.ifEmpty(null)) // Define the outcome of the paramer to a fixed variable - filter_q_value = REFINE_FDR.out.filter_refined_q_value.flatMap { it -> [ tuple(it[0].sample, it[0], it[1]) ] } + filter_q_value = REFINE_FDR.out.filter_refined_q_value } else { // Make sure that the columns that consists of the ID's, sample names and the idXML file names are returned - filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml.map { it -> [it[0].sample, it[0], it[1]] } + filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml } // - // SUBWORKFLOW: Perform the step to process the feature and obtain the belonging information + // SUBWORKFLOW: QUANT // - if (!params.skip_quantification) { - PROCESS_FEATURE ( - MAP_ALIGNMENT.out.aligned_idfilter, - MAP_ALIGNMENT.out.aligned_mzml, - filter_q_value - ) - ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions.ifEmpty(null)) + QUANT(merge_meta_map, OPENMS_PERCOLATORADAPTER.out.idxml, filter_q_value, ch_clean_mzml_file) + ch_versions = ch_versions.mix(QUANT.out.versions.ifEmpty(null)) + ch_output = QUANT.out.consensusxml } else { - OPENMS_TEXTEXPORTER_UNQUANTIFIED(filter_q_value.flatMap { ident, meta, idxml -> [[meta, idxml]] }) + ch_output = filter_q_value } + // Prepare for check if file is empty + OPENMS_TEXTEXPORTER(ch_output) + ch_versions = ch_versions.mix(OPENMS_TEXTEXPORTER.out.versions.ifEmpty(null)) + // Return an error message when there is only a header present in the document + OPENMS_TEXTEXPORTER.out.tsv.map { + meta, tsv -> if (tsv.size() < 130) { + log.warn "It seems that there were no significant hits found for this sample: " + meta.sample + "\nPlease consider incrementing the '--fdr_threshold' after removing the work directory or to exclude this sample. " + } + } + + OPENMS_MZTABEXPORTER(ch_output) + ch_versions = ch_versions.mix(OPENMS_MZTABEXPORTER.out.versions.ifEmpty(null)) + // // SUBWORKFLOW: Predict class I (neoepitopes) // if (params.predict_class_1 & !params.skip_quantification) { PREDICT_CLASS1 ( - PROCESS_FEATURE.out.mztab, + OPENMS_MZTABEXPORTER.out.mztab, peptides_class_1_alleles, ch_vcf_from_sheet - ) + ) ch_versions = ch_versions.mix(PREDICT_CLASS1.out.versions.ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_CLASS1.out.ch_predicted_possible_neoepitopes } else { @@ -282,7 +324,7 @@ workflow MHCQUANT { // if (params.predict_class_2 & !params.skip_quantification) { PREDICT_CLASS2 ( - PROCESS_FEATURE.out.mztab, + OPENMS_MZTABEXPORTER.out.mztab, peptides_class_2_alleles, ch_vcf_from_sheet ) @@ -292,26 +334,15 @@ workflow MHCQUANT { ch_predicted_possible_neoepitopes_II = Channel.empty() } - // - // SUBWORKFLOW: Predict retention time - // - if (params.predict_RT) { - PREDICT_RT ( - filter_q_value.map{ it -> [it[1], it[2]] }, - ch_predicted_possible_neoepitopes, - ch_predicted_possible_neoepitopes_II - ) - } - if (params.annotate_ions) { - // Alter the annotation of the filtered q value - ch_filtered_idxml = filter_q_value.map { ident, meta, idxml -> [meta.id, idxml] } // Join the ch_filtered_idxml and the ch_mzml_file - ch_raw_spectra_data = ch_mzml_file.map {meta, mzml -> [meta.sample + '_' + meta.condition, mzml] } + ch_clean_mzml_file.map { meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] } .groupTuple() - .join(ch_filtered_idxml) + .join(filter_q_value) + .set{ ch_ion_annotator_input } + // Annotate spectra with ion fragmentation information - PYOPENMS_IONANNOTATOR(ch_raw_spectra_data) + PYOPENMS_IONANNOTATOR( ch_ion_annotator_input ) ch_versions = ch_versions.mix(PYOPENMS_IONANNOTATOR.out.versions.ifEmpty(null)) } @@ -329,8 +360,8 @@ workflow MHCQUANT { workflow_summary = WorkflowMhcquant.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowMhcquant.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) + methods_description = WorkflowMhcquant.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) @@ -359,6 +390,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)