-
Notifications
You must be signed in to change notification settings - Fork 1
/
model.py
243 lines (187 loc) · 9.84 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
'''
The copyright in this software is being made available under this Software
Copyright License. This software may be subject to other third party and
contributor rights, including patent rights, and no such rights are
granted under this license.
Copyright (c) 1995 - 2020 Fraunhofer-Gesellschaft zur Förderung der
angewandten Forschung e.V. (Fraunhofer)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted for purpose of testing the functionalities of
this software provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the names of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, INCLUDING
WITHOUT LIMITATION THE PATENTS OF THE COPYRIGHT HOLDERS AND
CONTRIBUTORS, ARE GRANTED BY THIS SOFTWARE LICENSE. THE
COPYRIGHT HOLDERS AND CONTRIBUTORS PROVIDE NO WARRANTY OF PATENT
NON-INFRINGEMENT WITH RESPECT TO THIS SOFTWARE.
'''
import torch
import torch.nn as nn
class DistortionSensitivityModel(nn.Module):
def __init__(self, block_size, init_steepness, init_sensitivity,
distortions, optimize_gamma=False, min_steepness=0.01):
"""Neural network estimator for distortion sensitivity.
Parameters
----------
block_size : int
Height and Width of input blocks. Also influences the neural
network architecture. (See _build().)
init_slope : float,
Initial value of slope parameter.
init_sensitivity : float,
Initial value of the bias in the final layer. Using a good value
here can speed up training time and stability considerably.
distortions : list of strings,
Distortion types of input data. If optimize_gamma=False, this has
no effect. If optimize_gamma=True, the model will optimize one
extra parameter per distortion type that scales the network's
estimate of distortion sensitivity. The will be one additional
parameter for every distortion type in the given list.
optimize_gamma : bool,
If true, the model will optimize a distortion-type specific
parameter that scales the network's estimate of distortion
sensitivity.
min_steepness : float
Lower bound to be enforced on the steepness estimate. This will
stabilize the initial training phase.
"""
super(DistortionSensitivityModel, self).__init__()
self.block_size = block_size
self.init_sensitivity = init_sensitivity
self.init_steepness = init_steepness
self.min_steepness = min_steepness
self.distortions = distortions
self.optimize_gamma = optimize_gamma
self._build()
def _build(self):
"""
Sets up the model parameters. According to the paper, the neural network architecture depends on the input size (i.e., self.block_size) with:
block_size == 8:
C32 C32 C64 C64 C128 C128 P C256 C256 P C512 C512 P F512 F1
block_size == 16:
C32 C32 C64 C64 P C128 C128 P C256 C256 P C512 C512 P F512 F1
block_size == 32:
C32 C32 P C64 C64 P C128 C128 P C256 C256 P C512 C512 P F512 F1
block_size == 64:
C32 C32 P C64 C64 P C128 C128 P C256 P C256 P C512 C512 P F512 F1
block_size == 128:
C32 C32 P C64 C64 P C128 C128 P C256 P C256 P C512 P C512 P F512 F1
"""
# The input-independent steepness parameter for the sigmoid function.
self._steepness = nn.Parameter(torch.tensor([self.init_steepness], dtype=torch.float32))
# Set up one scaling parameter per distortion type.
self.gammas = dict()
for j, distortion in enumerate(self.distortions):
self.gammas[distortion] = nn.Parameter(torch.tensor([1.]))
# dictorary: str -> Parameter
self.gammas = nn.ParameterDict(self.gammas)
# construct neural network
self._features = []
self._features.append(nn.Conv2d(1, 32, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
self._features.append(nn.Conv2d(32, 32, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
if self.block_size > 8:
# no pooling for 8x8 or 8x8 16x16 inputs
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
self._features.append(nn.Conv2d(32, 64, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
self._features.append(nn.Conv2d(64, 64, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
if self.block_size > 16:
# no pooling for 8x8 or 16x16 inputs
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
self._features.append(nn.Conv2d(64, 128, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
self._features.append(nn.Conv2d(128, 128, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
self._features.append(nn.Conv2d(128, 256, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
if self.block_size > 32:
# no pooling for 8x8, 16x16 or 32x32 inputs
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
self._features.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
self._features.append(nn.Conv2d(256, 512, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
if self.block_size > 64:
# no pooling for 8x8, 16x16, 32x32 or 64x64 inputs
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
self._features.append(nn.Conv2d(512, 512, kernel_size=3, padding=1))
self._features.append(nn.LeakyReLU(negative_slope=0.2))
self._features.append(nn.MaxPool2d(kernel_size=2, stride=2))
# chain together all feature layers
self._features = nn.Sequential(*self._features)
self._regression = []
self._regression.append(
nn.Linear(in_features=512, out_features=512, bias=True))
self._regression.append(nn.Dropout(0.5))
self._regression.append(
nn.Linear(in_features=512, out_features=1, bias=True))
self._regression[-1].bias.data.fill_(self.init_sensitivity)
# chain together all regression layers
self._regression = nn.Sequential(*self._regression)
def forward(self, x):
"""
Apply model to compute sensitivity.
Parameters
----------
x : (float tensor, str) tuple
Tensor represents the input to the model and should have shape
(batch, channel, height, width). The string specifies the
distortion type of the data in the batch. All data in the tensor
must have the same distortion type.
Returns
-------
float tensor
Estimate of distortion sensitivity per batch element.
float tensor
Steepness for the sigmoid regression function.
"""
x, distortion = x
sensitivity = self._regression(self._features(x).squeeze())
if self.optimize_gamma:
# select the distortion type-dependent scaling factor
gamma = self.gammas[distortion]
sensitivity = gamma * sensitivity
# Early in training quality predictions are very scattered and the
# optimal steepness is very low, which may destabilize training and
# corrupt computation of correlations. To avoid this, steepness is
# enforced to be larger than 'min_steepness' at all times. Note that
# this usually only affects the initial training phase (perhaps
# first 5 iterations) after which the optimal steepness (and the
# estimate) should generally be larger than the enforced lower bound.
steepness = torch.clamp(self._steepness, min=self.min_steepness)
return sensitivity, steepness
def get_gamma_dict(self):
"""
Returns
-------
dict
Mapping str -> Parameter that represents the distortion type
specific scaling factors gamma.
"""
return dict(zip(self.gammas.keys(), self.gammas.values()))