From 66de1245b3fa523db02a20a5f8f57bd4204e2d2c Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Thu, 29 Feb 2024 12:00:17 +1300 Subject: [PATCH] Add tag uniqueness checks --- assets/schema_xref_assemblies.json | 2 +- lib/WorkflowAssemblyqc.groovy | 34 ++++++++++++++++++++++++++++++ nextflow.config | 5 +++-- tests/stub/stub.config | 2 +- workflows/assemblyqc.nf | 8 +++++++ 5 files changed, 47 insertions(+), 4 deletions(-) diff --git a/assets/schema_xref_assemblies.json b/assets/schema_xref_assemblies.json index e4a5a40a..af118cf3 100644 --- a/assets/schema_xref_assemblies.json +++ b/assets/schema_xref_assemblies.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/assets/schema_input.json", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/assets/schema_xref_assemblies.json", "title": "plant-food-research-open/assemblyqc pipeline - params.synteny_xref_assemblies schema", "description": "Schema for the file provided with params.synteny_xref_assemblies", "type": "array", diff --git a/lib/WorkflowAssemblyqc.groovy b/lib/WorkflowAssemblyqc.groovy index c410665b..5ec2cc32 100755 --- a/lib/WorkflowAssemblyqc.groovy +++ b/lib/WorkflowAssemblyqc.groovy @@ -48,6 +48,40 @@ class WorkflowAssemblyqc { } } + public static ArrayList validateInput(input) { + def inputFields = 5 + def assemblyTags = input[(0..input.size()-1).step(inputFields)] + + def tagCounts = [:] + assemblyTags.each { tag -> + tagCounts[tag] = tagCounts.containsKey(tag) ? tagCounts[tag] + 1 : 1 + } + def repeatedTags = tagCounts.findAll { key, count -> count > 1 }.collect { key, count -> key } + + if (repeatedTags.size() > 0) { + Nextflow.error("Please check input assemblysheet -> Multiple assemblies have the same tags!: ${repeatedTags}") + } + + return input + } + + public static ArrayList validateXrefAssemblies(xref) { + def xrefFields = 3 + def xrefTags = xref[(0..xref.size()-1).step(xrefFields)] + + def tagCounts = [:] + xrefTags.each { tag -> + tagCounts[tag] = tagCounts.containsKey(tag) ? tagCounts[tag] + 1 : 1 + } + def repeatedTags = tagCounts.findAll { key, count -> count > 1 }.collect { key, count -> key } + + if (repeatedTags.size() > 0) { + Nextflow.error("Please check synteny_xref_assemblies -> Multiple xref assemblies have the same tags!: ${repeatedTags}") + } + + return xref + } + public static String jsonifyParams(params) { return JsonOutput.toJson(params).toString() } diff --git a/nextflow.config b/nextflow.config index 795a3f41..662bab59 100644 --- a/nextflow.config +++ b/nextflow.config @@ -145,10 +145,11 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + // docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + // docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '--platform=linux/amd64' } singularity { singularity.enabled = true diff --git a/tests/stub/stub.config b/tests/stub/stub.config index cf626085..aafdf6e2 100644 --- a/tests/stub/stub.config +++ b/tests/stub/stub.config @@ -25,7 +25,7 @@ params { hic = 'tests/stub/hic/Dummy_hic.R{1,2}.fq.gz' - synteny_skip = false + synteny_skip = true // GitHub action runner runs out of memory synteny_xref_assemblies = 'https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/xrefsheet.csv' // Limit resources so that this can run on GitHub Actions diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf index db45eb06..7f2ca16a 100644 --- a/workflows/assemblyqc.nf +++ b/workflows/assemblyqc.nf @@ -65,12 +65,17 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/du // Info required for completion email and summary def assemblyqc_report = [] +def input_assembly_sheet_fields = 5 +def synteny_xref_assemblies_fields = 3 workflow ASSEMBLYQC { // Input channels ch_versions = Channel.empty() ch_input = Channel.fromSamplesheet('input') + | collect + | flatMap { WorkflowAssemblyqc.validateInput(it) } + | buffer(size: input_assembly_sheet_fields) ch_target_assemby_branch = ch_input | map { tag, fasta, gff, mono_ids, labels -> @@ -126,6 +131,9 @@ workflow ASSEMBLYQC { ch_xref_assembly = params.synteny_skip || ! params.synteny_xref_assemblies ? Channel.empty() : Channel.fromSamplesheet('synteny_xref_assemblies') + | collect + | flatMap { WorkflowAssemblyqc.validateXrefAssemblies(it) } + | buffer(size: synteny_xref_assemblies_fields) | map { tag, fa, labels -> [ tag, file(fa, checkIfExists: true), file(labels, checkIfExists: true) ] }