Skip to content

Commit

Permalink
fix(dreg): Move all awk scripts to bin/
Browse files Browse the repository at this point in the history
Also fixes a counting error
  • Loading branch information
edmundmiller committed Mar 31, 2024
1 parent bfbe484 commit d0aead9
Show file tree
Hide file tree
Showing 15 changed files with 152 additions and 42 deletions.
1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/pe_1.awk → bin/pe_1.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/pe_2.awk → bin/pe_2.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/pe_3.awk → bin/pe_3.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
9 changes: 5 additions & 4 deletions modules/local/dreg_prep/scripts/pe_6.awk → bin/pe_4.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}

($10 == "+") {
print $1, $5, $5 + 1, $7, $8, $10
}

($10 == "-") {
print $1, $6 - 1, $6, $7, $8, $10
}

($10 == "+") {
print $1, $5, $5 + 1, $7, $8, $10
}
8 changes: 5 additions & 3 deletions modules/local/dreg_prep/scripts/pe_5.awk → bin/pe_5.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}

($10 == "+") {
print $1, $5, $5 + 1, $7, $8, $10
}

($10 == "-") {
print $1, $6 - 1, $6, $7, $8, $10
}

($10 == "+") {
print $1, $5, $5 + 1, $7, $8, $10
}
2 changes: 2 additions & 0 deletions modules/local/dreg_prep/scripts/pe_7.awk → bin/pe_6.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand All @@ -9,3 +10,4 @@ BEGIN {
($9 == "-") {
print $1, $3 - 1, $3, $7, $8, $10
}

1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/pe_8.awk → bin/pe_7.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/pe_9.awk → bin/pe_8.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/se_1.awk → bin/se_1.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
2 changes: 2 additions & 0 deletions modules/local/dreg_prep/scripts/se_2.awk → bin/se_2.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand All @@ -13,3 +14,4 @@ BEGIN {
($6 == "-") {
print $1, $3 - 1, $3, $4, $5, "+"
}

2 changes: 2 additions & 0 deletions modules/local/dreg_prep/scripts/se_3.awk → bin/se_3.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}

{
print $1, $2, $3, -1 * $4
}

1 change: 1 addition & 0 deletions modules/local/dreg_prep/scripts/se_4.awk → bin/se_4.awk
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/awk -f
BEGIN {
OFS = "\t"
}
Expand Down
26 changes: 13 additions & 13 deletions modules/local/dreg_prep/templates/proseq2.0
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ echo "Writing bigWigs:"
if [[ "$meta.single_end" == "true" ]]; then
echo "SE"
if [[ "$assay_type" == "GROseq" ]]; then
bedtools bamtobed -i $bam_file | awk -f ${moduleDir}/scripts/se_1.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -i $bam_file | se_1.awk | gzip >${prefix}.bed.gz
#elif [[ "\${RNA3}" == "R1_5prime" && "\${OPP}" == "TRUE" ]] ; then #like PRO-seq
elif [[ "$assay_type" == "PROseq" ]]; then
bedtools bamtobed -i $bam_file | awk -f ${moduleDir}/scripts/se_2.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -i $bam_file | se_2.awk | gzip >${prefix}.bed.gz
fi
else
echo "PE"
Expand All @@ -31,35 +31,35 @@ else
if true; then
# HACK Map5 not supported for now
if true; then ## report The 5' end of the RNA. Danko lab leChRO-Seq protocol is on the 5' of _R1 readl, same strand of R1 (\$9)
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_1.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_1.awk | gzip >${prefix}.bed.gz
else ## report The 3' end of the RNA. Danko lab leChRO-Seq protocol is on the 5 prime of _R2 read, opposite strand of R2 (R2 strand \$10, R1 strand \$9)
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_2.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_2.awk | gzip >${prefix}.bed.gz
fi
# HACK Opposite-strand not supported for now
elif false; then
# HACK Map5 not supported for now
if true; then ## report The 5' end of the RNA.
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_4.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_3.awk | gzip >${prefix}.bed.gz
else ## report The 3' end of the RNA.
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_5.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_4.awk | gzip >${prefix}.bed.gz
fi
fi
elif [ "$assay_type" == "R2_5" ]; then
# HACK Opposite-strand not supported for now
if true; then
# HACK Map5 not supported for now
if true; then #report the 5 prime end of RNA, in Engreitz data is 5 prime end of R2, same strand
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_6.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_5.awk | gzip >${prefix}.bed.gz
else ## report the 3-prime end of the RNA, in Engreitz data is the 5' end of R1 read, but opposite strand
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_7.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_6.awk | gzip >${prefix}.bed.gz
fi
# HACK Opposite-strand not supported for now
elif false; then
# HACK Map5 not supported for now
if true; then #report the 5 prime end of RNA, in Engreitz data is 5 prime end of R2, same strand
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_8.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_7.awk | gzip >${prefix}.bed.gz
else ## report the 3-prime end of the RNA, in Engreitz data is the 5' end of R1 read, but opposite strand
bedtools bamtobed -bedpe -mate1 -i $bam_file | awk -f ${moduleDir}/scripts/pe_8.awk | gzip >${prefix}.bed.gz
bedtools bamtobed -bedpe -mate1 -i $bam_file | pe_8.awk | gzip >${prefix}.bed.gz
fi
fi
fi
Expand All @@ -79,11 +79,11 @@ bedtools genomecov -bg -i ${prefix}.nr.rs.bed.gz -g ${sizes} -strand + >${prefix
bedtools genomecov -bg -i ${prefix}.nr.rs.bed.gz -g ${sizes} -strand - >${prefix}_minus.noinv.bedGraph

## Invert minus strand.
cat ${prefix}_minus.noinv.bedGraph | awk -f ${moduleDir}/scripts/se_3.awk >${prefix}_minus.bedGraph ## Invert read counts on the minus strand.
cat ${prefix}_minus.noinv.bedGraph | se_3.awk >${prefix}_minus.bedGraph ## Invert read counts on the minus strand.

## normalized by RPM
cat ${prefix}_plus.bedGraph | awk -f ${moduleDir}/scripts/se_4.awk -v readCount="\$readCount" >${prefix}_plus.rpm.bedGraph
cat ${prefix}_minus.bedGraph | awk -f ${moduleDir}/scripts/se_4.awk -v readCount="\$readCount" >${prefix}_minus.rpm.bedGraph
cat ${prefix}_plus.bedGraph | se_4.awk -v readCount="\$readCount" >${prefix}_plus.rpm.bedGraph
cat ${prefix}_minus.bedGraph | se_4.awk -v readCount="\$readCount" >${prefix}_minus.rpm.bedGraph
## Then to bigWig (nomalized and non-nomrmalized ones)
bedGraphToBigWig ${prefix}_plus.rpm.bedGraph ${sizes} ${prefix}_plus.rpm.bw
bedGraphToBigWig ${prefix}_minus.rpm.bedGraph ${sizes} ${prefix}_minus.rpm.bw
Expand Down
6 changes: 2 additions & 4 deletions modules/local/dreg_prep/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ nextflow_process {

then {
assertAll(
// TODO
{ assert process.failed },
{ assert process.success },
{ assert snapshot(process.out).match("paired-end") }
)
}
Expand All @@ -53,8 +52,7 @@ nextflow_process {

then {
assertAll(
// FIXME
{ assert process.failed },
{ assert process.success },
{ assert snapshot(process.out).match("single-end") }
)
}
Expand Down
132 changes: 114 additions & 18 deletions modules/local/dreg_prep/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -3,70 +3,166 @@
"content": [
{
"0": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_plus.rpm.bw:md5,c723fc5bdb582655ea1aa1f324cb71b4"
]
],
"1": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_minus.rpm.bw:md5,2c78d94d8abc4a2bad792fd597d434be"
]
],
"2": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_plus.bw:md5,fd77f4d72b9f77ed71166d21de1b48bd"
]
],
"3": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_minus.bw:md5,92c7a60a606a1668a388c3f45d8f07f0"
]
],
"minus_bigwig": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_minus.bw:md5,92c7a60a606a1668a388c3f45d8f07f0"
]
],
"minus_rpm_bigwig": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_minus.rpm.bw:md5,2c78d94d8abc4a2bad792fd597d434be"
]
],
"plus_bigwig": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_plus.bw:md5,fd77f4d72b9f77ed71166d21de1b48bd"
]
],
"plus_rpm_bigwig": [

[
{
"id": "test",
"single_end": true
},
"test.dreg_plus.rpm.bw:md5,c723fc5bdb582655ea1aa1f324cb71b4"
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-03-30T14:10:02.23320779"
"timestamp": "2024-03-30T14:19:05.327654675"
},
"paired-end": {
"content": [
{
"0": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_plus.rpm.bw:md5,289652b9b07dec1d0c402e0de9001d9b"
]
],
"1": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_minus.rpm.bw:md5,5c7c70cecb2ff37236dffa8cd6e64e8f"
]
],
"2": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_plus.bw:md5,7098afd4d0a3927a97bca0f390318ef0"
]
],
"3": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_minus.bw:md5,6fc8a8d47c17b9c3e9fa00302cf4b388"
]
],
"minus_bigwig": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_minus.bw:md5,6fc8a8d47c17b9c3e9fa00302cf4b388"
]
],
"minus_rpm_bigwig": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_minus.rpm.bw:md5,5c7c70cecb2ff37236dffa8cd6e64e8f"
]
],
"plus_bigwig": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_plus.bw:md5,7098afd4d0a3927a97bca0f390318ef0"
]
],
"plus_rpm_bigwig": [

[
{
"id": "test",
"single_end": false
},
"test.dreg_plus.rpm.bw:md5,289652b9b07dec1d0c402e0de9001d9b"
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-03-30T14:09:52.151543045"
"timestamp": "2024-03-30T14:18:55.16822952"
}
}

0 comments on commit d0aead9

Please sign in to comment.