-
Notifications
You must be signed in to change notification settings - Fork 0
/
getNonBiasedHighest_Biased_AllelicBiaseDistribution.R
168 lines (142 loc) · 6.04 KB
/
getNonBiasedHighest_Biased_AllelicBiaseDistribution.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# function to merge multiple files based on common columns
multmerge = function(mypath, mypattern){
filenames=list.files(path=mypath, pattern = mypattern)
datalist = lapply(filenames, function(x){read.table(x, head=T)})
Reduce(function(x,y) {merge(x,y)}, datalist)}
## get binomial test p-value
load("../data-rpkms-001.RData")
pValue= multmerge(".", mypattern = glob2rx("*_pValue.txt"))
organs <- c("BN", "HT", "SK", "SP", "KD", "LV", "GI" ,"ST")
rpkm_s=rpkm[,grep("chr",colnames(rpkm))]
rpkm_s$BiasedOrgan <- rpkm$BiasedOrgan
rpkm_s$NonBH <- rpkm$NonBH
pValue=merge.data.frame(pValue, rpkm_s)
pValue$B.p=1 # the organ specific domain in Biased organ)
pValue$H.p=1 # the non-biased organ with highest expression level
for (i in 1:dim(pValue)[1]){
pValue$B.p[i]=pValue[i,grep(pValue$BiasedOrgan[i],colnames(pValue))] #find the p value of biased organ
pValue$H.p[i]=pValue[i,grep(pValue$NonBH[i],colnames(pValue))]#find the p value of non-biased organ with highest expression level
}
# make QQ plots
pdf("qqplot_pValue_Pooled_reads_1.pdf")
qqplot(-log(seq(0, 1, 1/5000),10), -log(pValue$H.p,10), col="red")
abline(0,1)
dev.off()
pdf("hist_pValue_Pooled_reads_1.pdf")
hist(pValue$H.p, breaks = seq(0,1,1/100))
dev.off()
## get allele-specific reads
AS.Read= multmerge(".", mypattern = glob2rx("*_AlleleSpecificReads.txt"))
# get the Biased organ and the non-biased organ with highest expression level
AS.Read= merge(AS.Read, rpkm_s)
# identify the allelic bias states of the organ-specific blocks in the Biased organ (M or P)
for (i in 1:dim(AS.Read)[1]){
AS.Read$B.win[i] <- AS.Read[i,grep(paste(AS.Read$BiasedOrgan[i],"win",sep = "."),colnames(AS.Read))]
}
AS.Read$B.win[AS.Read$B.win==1]="M"
AS.Read$B.win[AS.Read$B.win==2]="P"
# identify mat and pat reads of the non-biased highest organ and biased organ
for (i in 1:dim(AS.Read)[1]){
AS.Read$B.mat[i] <- AS.Read[i,grep(paste(AS.Read$BiasedOrgan[i],"mat",sep = "."),colnames(AS.Read))]
AS.Read$B.pat[i] <- AS.Read[i,grep(paste(AS.Read$BiasedOrgan[i],"pat",sep = "."),colnames(AS.Read))]
AS.Read$H.mat[i] <- AS.Read[i,grep(paste(AS.Read$NonBH[i],"mat",sep = "."),colnames(AS.Read))]
AS.Read$H.pat[i] <- AS.Read[i,grep(paste(AS.Read$NonBH[i],"pat",sep = "."),colnames(AS.Read))]
}
# calulate effect size (mat/pat or pat/mat depends on m |p of the biased organ)
# mat/pat if the biased organ is M, otherwise pat/mat
for (i in 1:dim(AS.Read)[1]){
if (AS.Read$B.win[i] == "M"){
AS.Read$H.effectSize[i] = AS.Read$H.mat[i]/AS.Read$H.pat[i]
AS.Read$B.effectSize[i] = AS.Read$B.mat[i]/AS.Read$B.pat[i]
}
else{
AS.Read$H.effectSize[i] = AS.Read$H.pat[i]/AS.Read$H.mat[i]
AS.Read$B.effectSize[i] = AS.Read$B.pat[i]/AS.Read$B.mat[i]
}
}
AS.Read$H.matReads.TotalRatio = AS.Read$H.mat/(AS.Read$H.pat+AS.Read$H.mat)
AS.Read$B.matReads.TotalRatio = AS.Read$B.mat/(AS.Read$B.pat+AS.Read$B.mat)
#boxplot(log2(AS.Read$H.effectSize), log2(AS.Read$B.effectSize))
#abline(h=0)
# exmaine the effect size.
pdf("AllelicBias_effectSize_NonBiaseH_Biased.pdf")
par(mar=c(6.1, 7.1, 2.1, 2.1)) #d l u r 5.1, 4.1, 4.1, 2.1
par(mgp=c(3,1,0))
par(cex.lab=2.2, cex.axis=2.2)
hist(log2(AS.Read$H.effectSize), col = "blue", breaks = seq(-10,20,1/10), xlim = c(-10,10), xlab="Effect Size (log2)", main="")
hist(log2(AS.Read$B.effectSize), xlim = c(-10,10), col="red", density=25, add=T, breaks = seq(-10,20,1/10))
abline(v=0, col="yellow", lty=3, lwd=2)
legend("topleft",
legend = c( "Biased","NonBiased"),
#pch=c(15,15),
cex=2,
lty=c(0,0),
#bty="n",
lwd=1.5,
density=c(25, 10000),
angle=c(45, 180),
#angle=45,
fill=c("red","blue")
, bty = "n"
)
dev.off()
pdf("AllelicBias_matRatio_NonBiaseH_Biased.pdf")
par(mar=c(6.1, 7.1, 2.1, 2.1)) #d l u r 5.1, 4.1, 4.1, 2.1
par(mgp=c(3,1,0))
par(cex.lab=2.2, cex.axis=2.2)
hist(AS.Read$H.matReads.TotalRatio, breaks = seq(0,1,1/30),col="blue", xlab="mat read ratio", main="")
hist(AS.Read$B.matReads.TotalRatio, breaks = seq(0,1,1/30), col = "red", density = 25, add=T)
legend("topright",
legend = c( "Biased","NonBiased"),
#pch=c(15,15),
cex=2,
lty=c(0,0),
#bty="n",
lwd=1.5,
density=c(25, 10000),
angle=c(45, 180),
#angle=45,
fill=c("red","blue")
, bty = "n"
)
dev.off()
### not
### to estimate the parameters of beta-binomial ###
library('VGAM')
fit.B.ab <- vglm(formula = cbind(AS.Read$B.mat,AS.Read$B.pat) ~ 1, family = "betabinomialff")
Coef(fit.B.ab)
B.shape1 = Coef(fit.B.ab)[1]
B.shape2 = Coef(fit.B.ab)[2]
fit.B.rho <- vglm(formula = cbind(AS.Read$B.mat,AS.Read$B.pat) ~ 1, family = "betabinomial")
Coef(fit.B.rho)
#mu rho
#0.5057735 0.3439821
fit.H.ab <- vglm(formula = cbind(AS.Read$H.mat,AS.Read$H.pat) ~ 1, family = "betabinomialff")
Coef(fit.H.ab)
#shape1 shape2
#7.230871 7.024651
H.shape1 = Coef(fit.H.ab)[1]
H.shape2 = Coef(fit.H.ab)[2]
fit.H.rho <- vglm(formula = cbind(AS.Read$H.mat,AS.Read$H.pat) ~ 1, family = "betabinomial")
Coef(fit.H.rho)
#mu rho
#0.50723306 0.0655451
all.mat <- NULL
all.pat <- NULL
organs <- c("BN", "HT", "SK", "SP", "KD", "LV", "GI" ,"ST")
for (o in organs){
m = AS.Read[,grep(paste(o, "mat", sep = "."), colnames(AS.Read))]
p = AS.Read[,grep(paste(o, "pat", sep = "."), colnames(AS.Read))]
all.mat = c(all.mat, m)
all.pat = c(all.pat, p)
}
fit.all.rho <- vglm(formula = cbind(all.mat, all.pat) ~ 1, family = "betabinomial")
mu = Coef(fit.all.rho)[1]
rho = Coef(fit.all.rho)[2]
hist(pbetabinom (AS.Read$H.mat, AS.Read$H.pat+AS.Read$H.mat, mu, rho))
hist(pbetabinom (AS.Read$B.mat, AS.Read$B.pat+AS.Read$B.mat, mu, rho))
hist(pbetabinom.ab (apply(cbind(AS.Read$B.mat,AS.Read$B.pat), 1, min),AS.Read$B.pat+AS.Read$B.mat, H.shape1, H.shape2))
hist(pbetabinom.ab (AS.Read$H.mat,AS.Read$H.pat+AS.Read$H.mat, H.shape1, H.shape2))
hist(pbetabinom (AS.Read$H.mat, AS.Read$H.pat+AS.Read$H.mat, 0.5, 0.07))
hist(pbetabinom (apply(cbind(AS.Read$H.mat,AS.Read$H.pat), 1, min),AS.Read$H.pat+AS.Read$B.mat, 0.5))
hist(pbetabinom (apply(cbind(AS.Read$B.mat,AS.Read$B.pat), 1, min),AS.Read$B.pat+AS.Read$B.mat, 0.5))