-
Notifications
You must be signed in to change notification settings - Fork 0
/
dawidSkene.lib.R
83 lines (62 loc) · 1.87 KB
/
dawidSkene.lib.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
require("dplyr")
dataGen <- function(fileAdd = "/Users/darkGene/workspace/repo/dawidSkene/anesthetic.csv"){
dataStr <- read.table(fileAdd)
data.df <- dataStr %>% arrange(V1) %>% select(-V1)
obs1 <- matrix(unlist(lapply(data.df$V2,
FUN = function(x){ return(as.numeric(unlist(strsplit(as.character(x), "")))) })), ncol = 3, byrow = T)
dataMat <- array(data = NA, dim = c(45,5,3))
dataMat[,,1] <- as.matrix(data.df)
dataMat[,1,] <- obs1
return(dataMat)
}
dawidSkeneComp <- function(FX){
I = dim(FX)[1]
J = length(na.omit(unique(as.vector(FX))))
K = dim(FX)[2]
N <- array(NA, dim = c(I, J, K)) #
for (i in 1:I){
for (j in 1:J){
for (k in 1:K){
if (length(dim(FX)) == 2){
N[i,j,k] <- sum(FX[i,k]== j, na.rm = T)
}else{
N[i,j,k] <- sum(FX[i,k,]== j, na.rm = T)
}
}
}
}
### Step i: Initial Estimates
Tinit <- matrix(0, nrow = I, ncol = J)
for (i in 1:I){
for (j in 1:J){
Tinit[i,j] <- sum(N[i,j,])/sum(N[i,,])
}
}
T_ij <- Tinit
for (iter in 1:20){
### Step ii: Estimate PI_ij and p_j #checked
PI <- array(data = NA, dim = c(J,J,K))
for (k in 1:K){
for (j in 1:J){
for (l in 1:J){
PI[j,l,k] <- T_ij[,j] %*% N[,l,k] / sum(T_ij[,j] %*% N[,,k])
}
}
}
P <- array(data = NA, dim = J) #Check
P <- colSums(T_ij)/I #Check
### Step iii: estimating new true labels
TijNew <- array(data = 1, dim = c(I,J))
for (i in 1:I){
for (k in 1:K){
for (l in 1:J){
TijNew[i,] <- TijNew[i,] * PI[,l,k] ^ N[i,l,k]
}
}
TijNew[i,] <- (TijNew[i,] * P)/sum(TijNew[i,] * P)
}
T_ij <- TijNew
}
binaryPred <- as.integer(T_ij[,2] > .5)
return(list("T"= T_ij, "Pi"= PI, "P" = P, "N" = N, "class" = binaryPred))
}