forked from ciernialab/Alder-ChIPseq-Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Convert_DEpeaks_to_bed.sh
110 lines (68 loc) · 4.6 KB
/
Convert_DEpeaks_to_bed.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/bin/bash
#
#SBATCH -c 4
#SBATCH --mem-per-cpu=4000
#SBATCH --job-name=Pos2Bed_DEpeaks
#SBATCH --output=Pos2Bed_DEpeaks.out
#SBATCH --time=4:00:00
##########################################################################################
#EXPLANATION ON AWK, IF INTERESTED
########################################################################################
# awk is a unix tool for text processing. It parses files (like most unix tools) on a per line basis,
# which can makes it very useful.
# We can use the following command (which prints the number of columns/fields for each row/line, and only prints
# out the unique values so we don't get the output for all rows):
# awk '{print NF}' Wendlen.H3K27ac_diffpeaksOutput.txt | uniq
# to see that there are 2 unique column values. The header has 147 columns, and the rest of the file has 19.
# Since the field we want to filter by (the adjusted p-values) is the last column,
# this means the field we want to filter by is actually the 19th column
# Now, the NF variable (which means Number of Fields) can also be used to denote the last field.
# So there's a few ways we can sort. One way is to use NF:
# awk '{if($NF<0.05) {print}}' ---> this runs a conditional based on the last field FOR EACH row, so the
# filtering will still work, since the field we want to filter (adjusted p values) is the last one
# we could also do awk '{if($19<=0.05) {print}}', which runs a conditional based on the 19th column.
# I chose this since it's more explicit. So the code is:
# awk '{if($19<=0.05) {print}}' DEpeaks/Wendlen.H3K27ac_diffpeaksOutput.txt > DEpeaks/FDRfiltered.Wendlen.H3K27ac_diffpeaksOutput.txt
# this says, using awk, for each line, if the value in the 19th column is <= 0.05, print the line
# and output the results into a new file titled "FDRfiltered.Wendlen.H3K27ac_diffpeaksOutput.txt"
##########################################################################################
#sort txt files by adjusted p value, filter for <0.05, and output to new file
########################################################################################
#H3K27ac Wendlen
awk '{if($19<=0.05) {print}}' DEpeaks/Wendlen.H3K27ac_diffpeaksOutput.txt > DEpeaks/FDRfiltered.Wendlen.H3K27ac_diffpeaksOutput.txt
#H3K27ac Gosselin
awk '{if($19<=0.05) {print}}' DEpeaks/Gosselin.H3K27ac_diffpeaksOutput.txt > DEpeaks/FDRfiltered.Gosselin.H3K27ac_diffpeaksOutput.txt
#H3K9ac Wendlen
awk '{if($19<=0.05) {print}}' DEpeaks/Wendlen.H3K9ac_diffpeaksOutput.txt > DEpeaks/FDRfiltered.Wendlen.H3K9ac_diffpeaksOutput.txt
#H3K9ac Gosselin
awk '{if($19<=0.05) {print}}' DEpeaks/Gosselin.H3K9ac_diffpeaksOutput.txt > DEpeaks/FDRfiltered.Gosselin.H3K9ac_diffpeaksOutput.txt
##########################################################################################
#Convert HOMER peak files to bed format for UCSC genome browser
########################################################################################
#H3K27ac Wendlen
#######################################################################################
grep -v '^#' DEpeaks/FDRfiltered.Wendlen.H3K27ac_diffpeaksOutput.txt > DEpeaks/tmp.txt
perl pos2bedmod.pl DEpeaks/tmp.txt > DEpeaks/tmp.bed
sed 's/^/chr/' DEpeaks/tmp.bed > DEpeaks/FDRfiltered.Wendlen.H3K27ac_diffpeaksOutput.bed
rm DEpeaks/tmp*
########################################################################################
#H3K27ac Gosselin
#######################################################################################
grep -v '^#' DEpeaks/FDRfiltered.Gosselin.H3K27ac_diffpeaksOutput.txt > DEpeaks/tmp.txt
perl pos2bedmod.pl DEpeaks/tmp.txt > DEpeaks/tmp.bed
sed 's/^/chr/' DEpeaks/tmp.bed > DEpeaks/FDRfiltered.Gosselin.H3K27ac_diffpeaksOutput.bed
rm DEpeaks/tmp*
########################################################################################
#H3K9ac Wendlen
#######################################################################################
grep -v '^#' DEpeaks/FDRfiltered.Wendlen.H3K9ac_diffpeaksOutput.txt > DEpeaks/tmp.txt
perl pos2bedmod.pl DEpeaks/tmp.txt > DEpeaks/tmp.bed
sed 's/^/chr/' DEpeaks/tmp.bed > DEpeaks/FDRfiltered.Wendlen.H3K9ac_diffpeaksOutput.bed
rm DEpeaks/tmp*
########################################################################################
#H3K9ac Gosselin
#######################################################################################
grep -v '^#' DEpeaks/FDRfiltered.Gosselin.H3K9ac_diffpeaksOutput.txt > DEpeaks/tmp.txt
perl pos2bedmod.pl DEpeaks/tmp.txt > DEpeaks/tmp.bed
sed 's/^/chr/' DEpeaks/tmp.bed > DEpeaks/FDRfiltered.Gosselin.H3K9ac_diffpeaksOutput.bed
rm DEpeaks/tmp*