forked from google/lyra
-
Notifications
You must be signed in to change notification settings - Fork 0
/
layer_wrapper.h
210 lines (180 loc) · 7.81 KB
/
layer_wrapper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LYRA_CODEC_LAYER_WRAPPER_H_
#define LYRA_CODEC_LAYER_WRAPPER_H_
#include <memory>
#include <string>
#include <utility>
#include <variant>
#include <vector>
#include "glog/logging.h"
#include "dsp_util.h"
#include "layer_wrapper_interface.h"
#include "sparse_inference_matrixvector.h"
namespace chromemedia {
namespace codec {
// Forward declarations.
template <typename WeightType, typename RhsType, typename OutputType,
typename DiskWeightType>
class TransposeConvolutionalLayerWrapper;
template <typename WeightType, typename RhsType, typename OutputType,
typename DiskWeightType>
class DilatedConvolutionalLayerWrapper;
template <typename WeightType, typename RhsType, typename OutputType,
typename DiskWeightType>
class Conv1DLayerWrapper;
// Class that wraps the data and logic of sparse linear layers.
template <typename WeightType, typename RhsType, typename OutputType,
typename DiskWeightType>
class LayerWrapper : public LayerWrapperInterface<WeightType, RhsType,
OutputType, DiskWeightType> {
public:
using Input = RhsType;
using Output = OutputType;
// Factory method to decide which subclass to create.
static std::unique_ptr<
LayerWrapper<WeightType, RhsType, OutputType, DiskWeightType>>
Create(const LayerParams& params) {
if (params.type == LayerType::kTranspose) {
return TransposeConvolutionalLayerWrapper<WeightType, RhsType, OutputType,
DiskWeightType>::Create(params);
} else if (params.type == LayerType::kDilated) {
return DilatedConvolutionalLayerWrapper<WeightType, RhsType, OutputType,
DiskWeightType>::Create(params);
} else if (params.type == LayerType::kConv1D) {
return Conv1DLayerWrapper<WeightType, RhsType, OutputType,
DiskWeightType>::Create(params);
} else {
LOG(ERROR) << "Unrecognized type";
return nullptr;
}
}
// Convenient method used in all subclass creation methods.
static std::unique_ptr<csrblocksparse::SparseLinearLayer<WeightType, RhsType>>
LoadAndCheckLayer(
const std::variant<LayerParams::FromDisk, LayerParams::FromConstant> from,
const std::string& prefix, const std::string& layer_prompt,
int expected_rows, int expected_cols, int num_threads) {
auto layer = absl::make_unique<
csrblocksparse::SparseLinearLayer<WeightType, RhsType>>();
if (std::holds_alternative<LayerParams::FromDisk>(from)) {
const auto from_disk = std::get<LayerParams::FromDisk>(from);
auto LoadLayer =
csrblocksparse::LoadSparseLayer<WeightType, RhsType, DiskWeightType>;
if (!LoadLayer(prefix, from_disk.zipped, layer.get(), from_disk.path)
.ok()) {
LOG(ERROR) << layer_prompt << " loading failed.";
return nullptr;
}
} else {
const auto from_constant = std::get<LayerParams::FromConstant>(from);
*layer = csrblocksparse::CreateConstantLayer<WeightType, RhsType>(
expected_rows, expected_cols, from_constant.sparsity,
from_constant.value);
}
LOG(INFO) << layer_prompt << " Shape: [" << layer->rows() << ", "
<< layer->cols() << "]."
<< " Sparsity: " << layer->sparsity();
// Dimension checks for the loaded layer.
if ((expected_rows > 0 && layer->rows() != expected_rows) ||
(expected_cols > 0 && layer->cols() != expected_cols)) {
LOG(ERROR) << layer_prompt << "Incompatible layer shape: expecting "
<< "[ " << expected_rows << ", " << expected_cols << "], "
<< "but is [" << layer->rows() << ", " << layer->cols()
<< "].";
return nullptr;
}
if (layer->PrepareForThreads(num_threads) != num_threads) {
LOG(ERROR) << layer_prompt << "Could not prepare for " << num_threads
<< " threads.";
return nullptr;
}
return layer;
}
virtual ~LayerWrapper() {}
// Runs the layer as a matrix multiplication and a bias-add, optionally
virtual void Run(
int tid, csrblocksparse::SpinBarrier* spin_barrier,
csrblocksparse::MutableVectorView<OutputType> output_view) = 0;
// The part of |input_buffer_| updated by the previous layer.
virtual csrblocksparse::MutableVectorView<RhsType> InputViewToUpdate() = 0;
virtual int PrepareForThreads(int num_threads) {
return layer_->PrepareForThreads(num_threads);
}
virtual int bytes() { return layer_->bytes(); }
virtual int rows() { return layer_->rows(); }
virtual int cols() { return layer_->cols(); }
protected:
LayerWrapper() = delete;
explicit LayerWrapper(
int num_input_channels, int output_rows, int length,
int input_buffer_rows, int input_buffer_cols, bool relu,
bool per_column_barrier,
std::unique_ptr<csrblocksparse::SparseLinearLayer<WeightType, RhsType>>
layer)
: num_input_channels_(num_input_channels),
output_rows_(output_rows),
length_(length),
input_buffer_rows_(input_buffer_rows),
input_buffer_cols_(input_buffer_cols),
relu_(relu),
per_column_barrier_(per_column_barrier),
layer_(std::move(layer)),
input_buffer_(input_buffer_rows_, input_buffer_cols_) {
input_buffer_.FillZero();
}
// Perform necessary memory shifting after each Run().
virtual void Reset(int tid, csrblocksparse::SpinBarrier* spin_barrier) = 0;
// Dimensions of matrices participating in y = Wx + b,
// y: |output_rows_| rows and |length_| columns.
// W: |output_rows_| rows and |input_buffer_rows_| columns.
// x: |input_buffer_rows_| rows and |length_| columns.
// b: |output_rows_| rows broadcasted to |length_| columns.
//
// Number of input channels. This is also the number of rows (out of
// |input_buffer_rows_| rows) in |input_buffer_| updated by the previous
// layer.
const int num_input_channels_;
// Number of output filters. This is the number of rows of the result of
// the matrix multiplication.
const int output_rows_;
// Number of columns (out of |input_buffer_cols_|) participating in the
// matrix multiplication.
const int length_;
// Number of rows of the input matrix of the multiplication. Equals to
// |num_input_channels_| * kernel size (not stored).
const int input_buffer_rows_;
// Number of columns of the input buffer. Because a layer may use past
// results as inputs (e.g. a dilated causal convolutional layer), the buffer
// needs to store more columns than those actually participate in the matrix
// multiplication.
const int input_buffer_cols_;
// Whether to perform Relu after the matrix multiplication.
const bool relu_;
// Whether to synchronize among threads after each column of matrix
// multiplication is done.
const bool per_column_barrier_;
std::unique_ptr<csrblocksparse::SparseLinearLayer<WeightType, RhsType>>
layer_;
csrblocksparse::FatCacheAlignedVector<RhsType> input_buffer_;
template <typename WeightTypeKindPeer,
template <typename, typename, typename, typename>
class LayerWrapperTypeTemplate>
friend class LayerWrapperPeer;
};
} // namespace codec
} // namespace chromemedia
#endif // LYRA_CODEC_LAYER_WRAPPER_H_