-
Notifications
You must be signed in to change notification settings - Fork 0
/
getBreakpoints.sh
executable file
·57 lines (41 loc) · 1.13 KB
/
getBreakpoints.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/bash
# vcf calls from indel callers
vcf="$1"
# where to write output of extracted breakpoints
output="$2"
egrep -v ^# "$vcf" | awk -F'\t' '{
OFS = FS
chrom = $1
pos = $2
ref = $4
alt = $5
if (ref ~ /,/ || alt ~ /,/) {
# skip multiallelic records
next
}
if (alt == "<DEL_L>" || alt == "<DEL_R>" || alt == "<INS>") {
# already a breakpoint
# matches Scotch (except for 1-bp dels), Pindel insertions
print chrom, pos, alt, "NA"
} else if (alt == "<DEL>") {
# Pindel deletion without allele
# get endpoint from END tag in INFO
info = $8
split(info, infoItems, ";")
endTag = infoItems[1]
split(endTag, endValues, "=")
end = endValues[2]
delLength = end - pos
print chrom, pos, "<DEL_L>", delLength
print chrom, pos + delLength, "<DEL_R>", delLength
} else if (length(ref) > length(alt)) {
# deletion
delLength = length(ref) - length(alt)
print chrom, pos, "<DEL_L>", delLength
print chrom, pos + delLength, "<DEL_R>", delLength
} else if (length(ref) < length(alt)) {
# insertion
insLength = length(alt) - length(ref)
print chrom, pos, "<INS>", insLength
}
}' > "$output"