-
Notifications
You must be signed in to change notification settings - Fork 0
/
weibull.py
181 lines (156 loc) · 7.87 KB
/
weibull.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import numpy as np
import torch
import os, sys
#from pynvml import *
class weibull:
def __init__(self, saved_model=None):
if saved_model:
self.wbFits = torch.zeros(saved_model['Scale'].shape[0],2)
self.wbFits[:, 1] = saved_model['Scale']
self.wbFits[:, 0] = saved_model['Shape']
self.sign = saved_model['signTensor']
self._ = saved_model['translateAmountTensor']
self.smallScoreTensor = saved_model['smallScoreTensor']
return
def return_all_parameters(self):
return dict(Scale = self.wbFits[:, 1],
Shape = self.wbFits[:, 0],
signTensor = self.sign,
translateAmountTensor = 1,
smallScoreTensor = self.smallScoreTensor)
def FitLow(self,data, tailSize, isSorted=False):
"""
data --> 5000 weibulls on 0 dim
--> 10000 distances for each weibull on 1 dim
"""
self.sign = -1
self._determine_splits(data, tailSize, isSorted)
if self.splits == 1:
return self._weibullFitting(data, tailSize, isSorted)
else:
return self._weibullFilltingInBatches(data, tailSize, isSorted)
def FitHigh(self, data, tailSize, isSorted=False):
self.sign = 1
self._determine_splits(data, tailSize, isSorted)
if self.splits == 1:
return self._weibullFitting(data, tailSize, isSorted)
else:
return self._weibullFilltingInBatches(data, tailSize, isSorted)
def wscore(self, distances):
"""
This function can calculate scores from various weibulls for a given set of distances
:param distances: a 2-D tensor with the number of rows equal to number of samples and number of columns equal to number of weibulls
Or
a 1-D tensor with number of elements equal to number of test samples
:return:
"""
self.deviceName = distances.device
scale_tensor = self.wbFits[:,1]
shape_tensor = self.wbFits[:, 0]
if self.sign == -1:
distances = -distances
if len(distances.shape)==1:
distances = distances.repeat(shape_tensor.shape[0],1)
smallScoreTensor=self.smallScoreTensor
if len(self.smallScoreTensor.shape)==2:
smallScoreTensor=self.smallScoreTensor[:,0]
distances = distances + 1 - smallScoreTensor.to(self.deviceName)[None,:]
weibulls = torch.distributions.weibull.Weibull(scale_tensor.to(self.deviceName),shape_tensor.to(self.deviceName), validate_args=False)
distances = distances.clamp(min=0)
return weibulls.cdf(distances)
def _weibullFitting(self, dataTensor, tailSize, isSorted=False):
self.deviceName = dataTensor.device
if isSorted:
sortedTensor = dataTensor
else:
if self.sign == -1:
dataTensor = -dataTensor
sortedTensor = torch.topk(dataTensor, tailSize, dim=1, largest=True, sorted=True).values
smallScoreTensor = sortedTensor[:, tailSize - 1].unsqueeze(1)
processedTensor = sortedTensor + 1 - smallScoreTensor
# Returned in the format [Shape,Scale]
if self.splits == 1:
self.wbFits = self._fit(processedTensor)
self.smallScoreTensor = smallScoreTensor
else:
return self._fit(processedTensor),smallScoreTensor
def _weibullFilltingInBatches(self, dataTensor, tailSize, isSorted = False, gpu=0):
N = dataTensor.shape[0]
dtype = dataTensor.dtype
batchSize = int(np.ceil(N / self.splits))
resultTensor = torch.zeros(size=(N,2), dtype=dtype)
reultTensor_smallScoreTensor = torch.zeros(size=(N,1), dtype=dtype)
for batchIter in range(int(self.splits-1)):
startIndex = batchIter*batchSize
endIndex = startIndex + batchSize
data_batch = dataTensor[startIndex:endIndex,:].cpu()#cuda(gpu)
result_batch, result_batch_smallScoreTensor = self._weibullFitting(data_batch, tailSize, isSorted)
resultTensor[startIndex:endIndex,:] = result_batch.cpu()
reultTensor_smallScoreTensor[startIndex:endIndex,:] = result_batch_smallScoreTensor.cpu()
# process the left-over
startIndex = (self.splits-1)*batchSize
endIndex = N
data_batch = dataTensor[startIndex:endIndex,:].cpu()#cuda(gpu)
result_batch, result_batch_smallScoreTensor = self._weibullFitting(data_batch, tailSize, isSorted)
resultTensor[startIndex:endIndex,:] = result_batch.cpu()
reultTensor_smallScoreTensor[startIndex:endIndex,:] = result_batch_smallScoreTensor.cpu()
self.wbFits = resultTensor
self.smallScoreTensor = reultTensor_smallScoreTensor
def _fit(self, data, iters=100, eps=1e-6):
"""
Fits multiple 2-parameter Weibull distributions to the given data using maximum-likelihood estimation.
:param data: 2d-tensor of samples. Each value must satisfy x > 0.
:param iters: Maximum number of iterations
:param eps: Stopping criterion. Fit is stopped ff the change within two iterations is smaller than eps.
:return: tensor with first column Shape, and second Scale these can be (NaN, NaN) if a fit is impossible.
Impossible fits may be due to 0-values in data.
"""
k = torch.ones(data.shape[0]).double().to(self.deviceName)
k_t_1 = k.clone()
ln_x = torch.log(data)
computed_params = torch.zeros(data.shape[0],2).double().to(self.deviceName)
not_completed = torch.ones(data.shape[0], dtype=torch.bool).to(self.deviceName)
for t in range(iters):
if torch.all(torch.logical_not(not_completed)):
break
# Partial derivative df/dk
x_k = data ** torch.transpose(k.repeat(data.shape[1],1),0,1)
x_k_ln_x = x_k * ln_x
fg = torch.sum(x_k,dim=1)
del x_k
ff = torch.sum(x_k_ln_x,dim=1)
ff_prime = torch.sum(x_k_ln_x * ln_x,dim=1)
del x_k_ln_x
ff_by_fg = ff/fg
del ff
f = ff_by_fg - torch.mean(ln_x,dim=1) - (1.0 / k)
f_prime = (ff_prime / fg - (ff_by_fg**2)) + (1. / (k * k))
del ff_prime, fg
# Newton-Raphson method k = k - f(k;x)/f'(k;x)
k -= f / f_prime
computed_params[not_completed*torch.isnan(f),:] = torch.tensor([float('nan'),float('nan')]).double().to(self.deviceName)
not_completed[abs(k - k_t_1) < eps] = False
computed_params[torch.logical_not(not_completed),0] = k[torch.logical_not(not_completed)]
lam = torch.mean(data ** torch.transpose(k.repeat(data.shape[1],1),0,1),dim=1) ** (1.0 / k)
# Lambda (scale) can be calculated directly
computed_params[torch.logical_not(not_completed), 1] = lam[torch.logical_not(not_completed)]
k_t_1 = k.clone()
return computed_params # Shape (SC), Scale (FE)
def _determine_splits(self, inputTensor, tailSize, isSorted = 0):
dtype_bytes = 8 # since float64
# split chunks according to available GPU memory
#nvmlInit()
#h = nvmlDeviceGetHandleByIndex(0)
#info = nvmlDeviceGetMemoryInfo(h)
gpu_free_mem = 1000#info.free / (1024 * 1024) # amount of free memeory in MB
#print(gpu_free_mem)
height, width = inputTensor.shape[0], inputTensor.shape[1]
size_estimate = height * width * dtype_bytes * 5
total_mem = (size_estimate) / (1024 * 1024) # amount in MB
#print(total_mem)
if total_mem < (gpu_free_mem * 0.7): #no chunks if GPU mem is enough
split = 1
else:
split = round((total_mem) / (gpu_free_mem * 0.7))
self.splits = split
#print('self.splits = ', self.splits)