-
Notifications
You must be signed in to change notification settings - Fork 0
/
DataAcquisitionProcess.R
159 lines (124 loc) · 5.16 KB
/
DataAcquisitionProcess.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Logit Normality in Data Acquisition Process in Geochemistry,
# comparing with other assumptions (ilr normality, clr normality, R normality)
# Several Parts, first is the data acquisition process algorithm, presented hereunder :
########################
# DATA ACQUISITION ALGO
#######################
library(MASS)
library(tidyverse)
library(compositions)
library(xtable)
library(rootSolve)
library(EnvStats)
library(ggpubr)
library(quadprog)
# Proportion of Oxygen in Major Oxides
frac_el <- 1/100*c(46.75, 47.87, 52.93, 69.94, 77.44, 60.31, 71.47, 74.18, 83.01, 43.64)
frac_ox <- rep(1,times=length(frac_el))-frac_el
# Let a true 22 parts composition roughly inspired from GEOPT48 :
true <- c(Si = 0.26501011, Ti = 0.00504876, Al = 0.10090108, Fe = 0.05051534,
Mn = 0.00114497, Mg = 0.00636078, Ca = 0.02521995, Na = 0.04770775,
K = 0.03117402, P = 0.00256124, O = 0.45967706, Ba = 0.00128169,
Sr = 0.00095118, Zr = 0.0004889, Ce = 0.0001817, F = 0.00013441,
Cr = 0.0001311, Nb = 9.66e-05, Zn = 7.195e-05, S = 6.746e-05,
Rb = 6.407e-05, U = 0.00120988)
name.majors <- names(true)[1:10]
name.traces <- names(true)[12:21]
# Logit and BLR Mean Function
logit <- function(x){log((x)/(1-x))}
logitInv <- function(x){1/(1+exp(-x))}
# BLR Mean function
blr.mean <- function(x){
# Map a vector from [0,1] to the real space. Takes the mean and return as its output the logit-inverse mean.
logit.t <- sapply(x,logit)
m <- mean(logit.t,na.rm=T)
result <- (exp(m))/( 1 + exp(m))
return(result)
}
blr.min <- function(mu){
D <- length(mu)
Dmat <- 2 * diag(D)
dvec <- 2 * mu
Amat <- cbind(rep(1, D), diag(D))
bvec <- c(1, rep(0, D))
sol <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
x <- sol$solution
return(x)
}
opti.blr.m <- function(x) {
blr.m <- apply(x,2,blr.mean)
opti.m <- blr.min(blr.m)
names(opti.m) <- names(blr.m)
return(opti.m)
}
mu <- true[c(c("Si", "Ti", "Al", "Fe", "Mn", "Mg", "Ca", "Na", "K", "P",
"Ba", "Sr", "Zr", "Ce", "F", "Cr", "Nb", "Zn", "S", "Rb", "U"
))] %>% logit() %>% unclass()
# mu <- mu.parameters[[27]]
# Sigma <- variance.R.space[[27]]
GJ.sim <- function(true,Sigma){
mu <- logit(true)[1:21]
sample.inR <- MASS::mvrnorm(n=100,mu = mu,Sigma = Sigma)
sample.inH <- sample.inR %>% as_tibble() %>% apply(2,logitInv) %>% as_tibble()
# Keep only the major elements :
sample.inH.onlymajor <- sample.inH[1:10]
for (i in 1:nrow(sample.inR)){
sample.inH.onlymajor[i,] <- clo(sample.inH.onlymajor[i,])*sum(true[1:10])
}
# Now one should perturb this sample by perturbating each observation
# with a measurement error term. That's each row should deviate from
# the exact proportion (0.535644 %) by some random error term...
# 1 Map each column of the matrix of major elements in the simplex
logit.transf.sample <- logit(sample.inH.onlymajor)
# Let a multiplicative error term which is lognormal distributed with
# mean 0 and standard deviation 0.005.
multiplicative.error.term <- exp(rnorm(100,0,2.5E-2))
# Perturb the major elements
logit.transf.sample.perturbed <- logit.transf.sample*multiplicative.error.term
# Then perform logitINV transf
perturbed.sample.major <- logit.transf.sample.perturbed %>% logitInv()
# Step 2 : Compute the oxygen for each composition (deterministic quantity, not random !)
oxygen.computed <- c()
for (i in 1:nrow(perturbed.sample.major)){
oxygen.computed[i] <- sum(frac_el^{-1}*perturbed.sample.major[i,]-perturbed.sample.major[i,])
}
names(perturbed.sample.major) <- name.majors
perturbed.sample.major$O <- oxygen.computed
# Step 3 draw randomly from traces, now Sigma is 1E-4*diag(abs(mu))
# Keep only the trace elements :
sample.inH.onlytraces <- sample.inH[12:21]
# Close it
for (i in 1:nrow(sample.inR)){
sample.inH.onlytraces[i,] <- clo(sample.inH.onlytraces[i,])*sum(true[12:21])
}
# Perturb the sample with a lognormal error term :
# 1 Map each column of the matrix of major elements in the simplex
logit.transf.sample <- logit(sample.inH.onlytraces)
# Let a multiplicative error term which is lognormal distributed with
# mean 0 and standard deviation 0.005.
multiplicative.error.term <- exp(rnorm(100,0,2.5E-2))
# Perturb the major elements
logit.transf.sample.perturbed <- logit.transf.sample*multiplicative.error.term
# Then perform logitINV transf
perturbed.sample.traces <- logit.transf.sample.perturbed %>% logitInv()
df <- data.frame(perturbed.sample.major,perturbed.sample.traces)
virtual.U <- 1-rowSums(df)
sample.U <- ifelse(virtual.U>=0,virtual.U,NA)
mean.nonnegu <- sample.U %>% log() %>% mean(na.rm=T)
sd.nonnegu <- sample.U %>% log() %>% sd(na.rm=T)
nb.missingval <- length(sample.U[is.na(sample.U)] )
replacement.val <- rnorm(nb.missingval,mean.nonnegu,sd.nonnegu) %>% exp()
df$U <- sample.U
df$U <- replace(df$U,which(is.na(df$U)),replacement.val)
df
#df.w.U <- clo(df)
#df <- clo(df[-22])
return(df)
}
# Applying data acqusition process algorithm to generate 500 datasets :
#list.df <- list()
#m <- 500
#for (i in 1:500){
# list.df[[i]] <- GJ.sim(mu)
# }
#saveRDS(list.df,file="src/generated_geopt48_data.RData" )