diff --git a/CHANGELOG.md b/CHANGELOG.md index e14914386..0a90f9db6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [1653](https://github.com/nf-core/sarek/pull/1653) - Updates `sarek_subway` files with `lofreq` - [1660](https://github.com/nf-core/sarek/pull/1642) - Add `--length_required` for minimal reads length with `FASTP` - [1663](https://github.com/nf-core/sarek/pull/1663) - Massive conda modules update +- [1664](https://github.com/nf-core/sarek/pull/1664) - Check if flowcell ID matches for read pair ### Changed diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index f60bc3d93..6ece09f9a 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -944,11 +944,15 @@ workflow SAREK { // Add readgroup to meta and remove lane def addReadgroupToMeta(meta, files) { def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def flowcell = flowcellLaneFromFastq(files[0]) + + // Check if flowcell ID matches + if ( flowcell && flowcell != flowcellLaneFromFastq(files[1]) ){ + error("Flowcell ID does not match for paired reads of sample ${meta.id} - ${files}") + } - // Here we're assuming that fastq_1 and fastq_2 are from the same flowcell: // If we cannot read the flowcell ID from the fastq file, then we don't use it - def sample_lane_id = flowcellLaneFromFastq(files[0]) ? "${meta.flowcell}.${meta.sample}.${meta.lane}" : "${meta.sample}.${meta.lane}" - // TO-DO: Would it perhaps be better to also call flowcellLaneFromFastq(files[1]) and check that we get the same flowcell-id? + def sample_lane_id = flowcell ? "${meta.flowcell}.${meta.sample}.${meta.lane}" : "${meta.sample}.${meta.lane}" // Don't use a random element for ID, it breaks resuming def read_group = "\"@RG\\tID:${sample_lane_id}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""