From d0aead9a8e89a7e16e7a72433233a0d4a0ef9f1d Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sat, 30 Mar 2024 12:21:13 -0500 Subject: [PATCH] fix(dreg): Move all awk scripts to bin/ Also fixes a counting error --- .../local/dreg_prep/scripts => bin}/pe_1.awk | 1 + .../local/dreg_prep/scripts => bin}/pe_2.awk | 1 + .../local/dreg_prep/scripts => bin}/pe_3.awk | 1 + .../scripts/pe_6.awk => bin/pe_4.awk | 9 +- .../local/dreg_prep/scripts => bin}/pe_5.awk | 8 +- .../scripts/pe_7.awk => bin/pe_6.awk | 2 + .../scripts/pe_8.awk => bin/pe_7.awk | 1 + .../scripts/pe_9.awk => bin/pe_8.awk | 1 + .../local/dreg_prep/scripts => bin}/se_1.awk | 1 + .../local/dreg_prep/scripts => bin}/se_2.awk | 2 + .../local/dreg_prep/scripts => bin}/se_3.awk | 2 + .../local/dreg_prep/scripts => bin}/se_4.awk | 1 + modules/local/dreg_prep/templates/proseq2.0 | 26 ++-- modules/local/dreg_prep/tests/main.nf.test | 6 +- .../local/dreg_prep/tests/main.nf.test.snap | 132 +++++++++++++++--- 15 files changed, 152 insertions(+), 42 deletions(-) rename {modules/local/dreg_prep/scripts => bin}/pe_1.awk (89%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/pe_2.awk (90%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/pe_3.awk (90%) mode change 100644 => 100755 rename modules/local/dreg_prep/scripts/pe_6.awk => bin/pe_4.awk (90%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/pe_5.awk (89%) mode change 100644 => 100755 rename modules/local/dreg_prep/scripts/pe_7.awk => bin/pe_6.awk (89%) mode change 100644 => 100755 rename modules/local/dreg_prep/scripts/pe_8.awk => bin/pe_7.awk (90%) mode change 100644 => 100755 rename modules/local/dreg_prep/scripts/pe_9.awk => bin/pe_8.awk (89%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/se_1.awk (91%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/se_2.awk (90%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/se_3.awk (78%) mode change 100644 => 100755 rename {modules/local/dreg_prep/scripts => bin}/se_4.awk (84%) mode change 100644 => 100755 diff --git a/modules/local/dreg_prep/scripts/pe_1.awk b/bin/pe_1.awk old mode 100644 new mode 100755 similarity index 89% rename from modules/local/dreg_prep/scripts/pe_1.awk rename to bin/pe_1.awk index 97dc1612..2e99042f --- a/modules/local/dreg_prep/scripts/pe_1.awk +++ b/bin/pe_1.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/scripts/pe_2.awk b/bin/pe_2.awk old mode 100644 new mode 100755 similarity index 90% rename from modules/local/dreg_prep/scripts/pe_2.awk rename to bin/pe_2.awk index fd965dbe..a7877105 --- a/modules/local/dreg_prep/scripts/pe_2.awk +++ b/bin/pe_2.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/scripts/pe_3.awk b/bin/pe_3.awk old mode 100644 new mode 100755 similarity index 90% rename from modules/local/dreg_prep/scripts/pe_3.awk rename to bin/pe_3.awk index f7a3fa4e..8ebf797d --- a/modules/local/dreg_prep/scripts/pe_3.awk +++ b/bin/pe_3.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/scripts/pe_6.awk b/bin/pe_4.awk old mode 100644 new mode 100755 similarity index 90% rename from modules/local/dreg_prep/scripts/pe_6.awk rename to bin/pe_4.awk index bd492463..d48697e4 --- a/modules/local/dreg_prep/scripts/pe_6.awk +++ b/bin/pe_4.awk @@ -1,11 +1,12 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } -($10 == "+") { - print $1, $5, $5 + 1, $7, $8, $10 -} - ($10 == "-") { print $1, $6 - 1, $6, $7, $8, $10 } + +($10 == "+") { + print $1, $5, $5 + 1, $7, $8, $10 +} diff --git a/modules/local/dreg_prep/scripts/pe_5.awk b/bin/pe_5.awk old mode 100644 new mode 100755 similarity index 89% rename from modules/local/dreg_prep/scripts/pe_5.awk rename to bin/pe_5.awk index f0d01a43..0e665204 --- a/modules/local/dreg_prep/scripts/pe_5.awk +++ b/bin/pe_5.awk @@ -1,11 +1,13 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } +($10 == "+") { + print $1, $5, $5 + 1, $7, $8, $10 +} + ($10 == "-") { print $1, $6 - 1, $6, $7, $8, $10 } -($10 == "+") { - print $1, $5, $5 + 1, $7, $8, $10 -} diff --git a/modules/local/dreg_prep/scripts/pe_7.awk b/bin/pe_6.awk old mode 100644 new mode 100755 similarity index 89% rename from modules/local/dreg_prep/scripts/pe_7.awk rename to bin/pe_6.awk index f7a3fa4e..f8c5f03d --- a/modules/local/dreg_prep/scripts/pe_7.awk +++ b/bin/pe_6.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } @@ -9,3 +10,4 @@ BEGIN { ($9 == "-") { print $1, $3 - 1, $3, $7, $8, $10 } + diff --git a/modules/local/dreg_prep/scripts/pe_8.awk b/bin/pe_7.awk old mode 100644 new mode 100755 similarity index 90% rename from modules/local/dreg_prep/scripts/pe_8.awk rename to bin/pe_7.awk index cd31e86b..1c563bf5 --- a/modules/local/dreg_prep/scripts/pe_8.awk +++ b/bin/pe_7.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/scripts/pe_9.awk b/bin/pe_8.awk old mode 100644 new mode 100755 similarity index 89% rename from modules/local/dreg_prep/scripts/pe_9.awk rename to bin/pe_8.awk index 97dc1612..2e99042f --- a/modules/local/dreg_prep/scripts/pe_9.awk +++ b/bin/pe_8.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/scripts/se_1.awk b/bin/se_1.awk old mode 100644 new mode 100755 similarity index 91% rename from modules/local/dreg_prep/scripts/se_1.awk rename to bin/se_1.awk index 9840d562..e66d6493 --- a/modules/local/dreg_prep/scripts/se_1.awk +++ b/bin/se_1.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/scripts/se_2.awk b/bin/se_2.awk old mode 100644 new mode 100755 similarity index 90% rename from modules/local/dreg_prep/scripts/se_2.awk rename to bin/se_2.awk index dffc0f17..2146f5e0 --- a/modules/local/dreg_prep/scripts/se_2.awk +++ b/bin/se_2.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } @@ -13,3 +14,4 @@ BEGIN { ($6 == "-") { print $1, $3 - 1, $3, $4, $5, "+" } + diff --git a/modules/local/dreg_prep/scripts/se_3.awk b/bin/se_3.awk old mode 100644 new mode 100755 similarity index 78% rename from modules/local/dreg_prep/scripts/se_3.awk rename to bin/se_3.awk index 042c6b54..8d8ae15b --- a/modules/local/dreg_prep/scripts/se_3.awk +++ b/bin/se_3.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } @@ -5,3 +6,4 @@ BEGIN { { print $1, $2, $3, -1 * $4 } + diff --git a/modules/local/dreg_prep/scripts/se_4.awk b/bin/se_4.awk old mode 100644 new mode 100755 similarity index 84% rename from modules/local/dreg_prep/scripts/se_4.awk rename to bin/se_4.awk index 69e6ac26..84682d9f --- a/modules/local/dreg_prep/scripts/se_4.awk +++ b/bin/se_4.awk @@ -1,3 +1,4 @@ +#!/bin/awk -f BEGIN { OFS = "\t" } diff --git a/modules/local/dreg_prep/templates/proseq2.0 b/modules/local/dreg_prep/templates/proseq2.0 index a35213f4..3ca2f0bf 100755 --- a/modules/local/dreg_prep/templates/proseq2.0 +++ b/modules/local/dreg_prep/templates/proseq2.0 @@ -19,10 +19,10 @@ echo "Writing bigWigs:" if [[ "$meta.single_end" == "true" ]]; then echo "SE" if [[ "$assay_type" == "GROseq" ]]; then - bedtools bamtobed -i $bam_file | awk -f ${moduleDir}/scripts/se_1.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -i $bam_file | se_1.awk | gzip >${prefix}.bed.gz #elif [[ "\${RNA3}" == "R1_5prime" && "\${OPP}" == "TRUE" ]] ; then #like PRO-seq elif [[ "$assay_type" == "PROseq" ]]; then - bedtools bamtobed -i $bam_file | awk -f ${moduleDir}/scripts/se_2.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -i $bam_file | se_2.awk | gzip >${prefix}.bed.gz fi else echo "PE" @@ -31,17 +31,17 @@ else if true; then # HACK Map5 not supported for now if true; then ## report The 5' end of the RNA. Danko lab leChRO-Seq protocol is on the 5' of _R1 readl, same strand of R1 (\$9) - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_1.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_1.awk | gzip >${prefix}.bed.gz else ## report The 3' end of the RNA. Danko lab leChRO-Seq protocol is on the 5 prime of _R2 read, opposite strand of R2 (R2 strand \$10, R1 strand \$9) - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_2.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_2.awk | gzip >${prefix}.bed.gz fi # HACK Opposite-strand not supported for now elif false; then # HACK Map5 not supported for now if true; then ## report The 5' end of the RNA. - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_4.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_3.awk | gzip >${prefix}.bed.gz else ## report The 3' end of the RNA. - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_5.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_4.awk | gzip >${prefix}.bed.gz fi fi elif [ "$assay_type" == "R2_5" ]; then @@ -49,17 +49,17 @@ else if true; then # HACK Map5 not supported for now if true; then #report the 5 prime end of RNA, in Engreitz data is 5 prime end of R2, same strand - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_6.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_5.awk | gzip >${prefix}.bed.gz else ## report the 3-prime end of the RNA, in Engreitz data is the 5' end of R1 read, but opposite strand - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_7.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_6.awk | gzip >${prefix}.bed.gz fi # HACK Opposite-strand not supported for now elif false; then # HACK Map5 not supported for now if true; then #report the 5 prime end of RNA, in Engreitz data is 5 prime end of R2, same strand - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_8.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_7.awk | gzip >${prefix}.bed.gz else ## report the 3-prime end of the RNA, in Engreitz data is the 5' end of R1 read, but opposite strand - bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_8.awk | gzip >${prefix}.bed.gz + bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_8.awk | gzip >${prefix}.bed.gz fi fi fi @@ -79,11 +79,11 @@ bedtools genomecov -bg -i ${prefix}.nr.rs.bed.gz -g ${sizes} -strand + >${prefix bedtools genomecov -bg -i ${prefix}.nr.rs.bed.gz -g ${sizes} -strand - >${prefix}_minus.noinv.bedGraph ## Invert minus strand. -cat ${prefix}_minus.noinv.bedGraph | awk -f ${moduleDir}/scripts/se_3.awk >${prefix}_minus.bedGraph ## Invert read counts on the minus strand. +cat ${prefix}_minus.noinv.bedGraph | se_3.awk >${prefix}_minus.bedGraph ## Invert read counts on the minus strand. ## normalized by RPM -cat ${prefix}_plus.bedGraph | awk -f ${moduleDir}/scripts/se_4.awk -v readCount="\$readCount" >${prefix}_plus.rpm.bedGraph -cat ${prefix}_minus.bedGraph | awk -f ${moduleDir}/scripts/se_4.awk -v readCount="\$readCount" >${prefix}_minus.rpm.bedGraph +cat ${prefix}_plus.bedGraph | se_4.awk -v readCount="\$readCount" >${prefix}_plus.rpm.bedGraph +cat ${prefix}_minus.bedGraph | se_4.awk -v readCount="\$readCount" >${prefix}_minus.rpm.bedGraph ## Then to bigWig (nomalized and non-nomrmalized ones) bedGraphToBigWig ${prefix}_plus.rpm.bedGraph ${sizes} ${prefix}_plus.rpm.bw bedGraphToBigWig ${prefix}_minus.rpm.bedGraph ${sizes} ${prefix}_minus.rpm.bw diff --git a/modules/local/dreg_prep/tests/main.nf.test b/modules/local/dreg_prep/tests/main.nf.test index fe0e2f26..eaa93b4e 100644 --- a/modules/local/dreg_prep/tests/main.nf.test +++ b/modules/local/dreg_prep/tests/main.nf.test @@ -26,8 +26,7 @@ nextflow_process { then { assertAll( - // TODO - { assert process.failed }, + { assert process.success }, { assert snapshot(process.out).match("paired-end") } ) } @@ -53,8 +52,7 @@ nextflow_process { then { assertAll( - // FIXME - { assert process.failed }, + { assert process.success }, { assert snapshot(process.out).match("single-end") } ) } diff --git a/modules/local/dreg_prep/tests/main.nf.test.snap b/modules/local/dreg_prep/tests/main.nf.test.snap index fae30cce..28a96876 100644 --- a/modules/local/dreg_prep/tests/main.nf.test.snap +++ b/modules/local/dreg_prep/tests/main.nf.test.snap @@ -3,28 +3,76 @@ "content": [ { "0": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_plus.rpm.bw:md5,c723fc5bdb582655ea1aa1f324cb71b4" + ] ], "1": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_minus.rpm.bw:md5,2c78d94d8abc4a2bad792fd597d434be" + ] ], "2": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_plus.bw:md5,fd77f4d72b9f77ed71166d21de1b48bd" + ] ], "3": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_minus.bw:md5,92c7a60a606a1668a388c3f45d8f07f0" + ] ], "minus_bigwig": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_minus.bw:md5,92c7a60a606a1668a388c3f45d8f07f0" + ] ], "minus_rpm_bigwig": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_minus.rpm.bw:md5,2c78d94d8abc4a2bad792fd597d434be" + ] ], "plus_bigwig": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_plus.bw:md5,fd77f4d72b9f77ed71166d21de1b48bd" + ] ], "plus_rpm_bigwig": [ - + [ + { + "id": "test", + "single_end": true + }, + "test.dreg_plus.rpm.bw:md5,c723fc5bdb582655ea1aa1f324cb71b4" + ] ] } ], @@ -32,34 +80,82 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-30T14:10:02.23320779" + "timestamp": "2024-03-30T14:19:05.327654675" }, "paired-end": { "content": [ { "0": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_plus.rpm.bw:md5,289652b9b07dec1d0c402e0de9001d9b" + ] ], "1": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_minus.rpm.bw:md5,5c7c70cecb2ff37236dffa8cd6e64e8f" + ] ], "2": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_plus.bw:md5,7098afd4d0a3927a97bca0f390318ef0" + ] ], "3": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_minus.bw:md5,6fc8a8d47c17b9c3e9fa00302cf4b388" + ] ], "minus_bigwig": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_minus.bw:md5,6fc8a8d47c17b9c3e9fa00302cf4b388" + ] ], "minus_rpm_bigwig": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_minus.rpm.bw:md5,5c7c70cecb2ff37236dffa8cd6e64e8f" + ] ], "plus_bigwig": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_plus.bw:md5,7098afd4d0a3927a97bca0f390318ef0" + ] ], "plus_rpm_bigwig": [ - + [ + { + "id": "test", + "single_end": false + }, + "test.dreg_plus.rpm.bw:md5,289652b9b07dec1d0c402e0de9001d9b" + ] ] } ], @@ -67,6 +163,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-30T14:09:52.151543045" + "timestamp": "2024-03-30T14:18:55.16822952" } } \ No newline at end of file