Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into jiwaszki/fuse_mul…
Browse files Browse the repository at this point in the history
…_fc_luci_test
  • Loading branch information
jiwaszki committed Aug 22, 2024
2 parents 4ea2047 + e871367 commit 2adb456
Show file tree
Hide file tree
Showing 61 changed files with 1,848 additions and 267 deletions.
4 changes: 4 additions & 0 deletions compiler/circle2circle/src/Circle2Circle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ int entry(int argc, char **argv)
"This will fuse Mul operation with a preceding Conv if possible.");
add_switch(arser, "--fuse_mul_with_div",
"This will fuse Mul operation with a Div operation whose numerator is const.");
add_switch(arser, "--fuse_mul_with_fullyconnected",
"This will fuse Mul operator with a preceding FullyConnected operator.");
add_switch(arser, "--fuse_slice_with_tconv",
"This will fuse Slice operation with a preceding TConv if possible.");
add_switch(arser, "--fuse_transpose_with_mean",
Expand Down Expand Up @@ -326,6 +328,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FuseMulWithConv);
if (arser.get<bool>("--fuse_mul_with_div"))
options->enable(Algorithms::FuseMulWithDiv);
if (arser.get<bool>("--fuse_mul_with_fullyconnected"))
options->enable(Algorithms::FuseMulWithFullyConnected);
if (arser.get<bool>("--make_batchnorm_gamma_positive"))
options->enable(Algorithms::MakeBatchNormGammaPositive);
if (arser.get<bool>("--fuse_preactivation_batchnorm"))
Expand Down
138 changes: 102 additions & 36 deletions compiler/luci-interpreter/src/kernels/InstanceNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,35 @@ InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tenso

void InstanceNorm::configure()
{
LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
gamma()->shape().dim(0) == 1);
LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
beta()->shape().dim(0) == 1);
if (input()->shape().num_dims() == 4)
{
LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
gamma()->shape().dim(0) == 1);
LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
beta()->shape().dim(0) == 1);
}
else if (input()->shape().num_dims() == 3)
{
LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 3);
LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(1) ||
gamma()->shape().dim(0) == 1);
LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(1) ||
beta()->shape().dim(0) == 1);
}
else
LUCI_INTERPRETER_CHECK(false && "luci-intp InstanceNorm unsupported rank.");

output()->resize(input()->shape());
}

Expand All @@ -63,12 +82,9 @@ void InstanceNorm::evalFloat() const
{
float activation_min, activation_max;
calculateActivationRange(params().activation, &activation_min, &activation_max);
auto input_shape = getTensorShape(input());
tflite::RuntimeShape input_shape = getTensorShape(input());
auto output_shape = getTensorShape(output());
const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);

const float *input_data = getTensorData<float>(input());
const float *gamma_data = getTensorData<float>(gamma());
auto gamma_shape = getTensorShape(gamma());
Expand All @@ -77,44 +93,94 @@ void InstanceNorm::evalFloat() const
auto beta_shape = getTensorShape(beta());
bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
float *output_data = getTensorData<float>(output());
for (int32_t batch = 0; batch < batches; batch++)

if (input_shape.DimensionsCount() == 4)
{
for (int32_t channel = 0; channel < channels; channel++)
// Dimensions for image case are (N x H x W x C)
const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
for (int32_t batch = 0; batch < batches; batch++)
{
double sum = 0.0f;
double square_sum = 0.0f;
int32_t size = heights * widths;
for (int32_t height = 0; height < heights; height++)
for (int32_t channel = 0; channel < channels; channel++)
{
for (int32_t width = 0; width < widths; width++)
double sum = 0.0f;
double square_sum = 0.0f;
int32_t size = heights * widths;
for (int32_t height = 0; height < heights; height++)
{
for (int32_t width = 0; width < widths; width++)
{
double input_val =
input_data[tflite::Offset(input_shape, batch, height, width, channel)];
sum += input_val;
square_sum += (input_val * input_val);
}
}
double mean = sum / size;
double var = square_sum / size - mean * mean;

double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
double beta = single_beta ? beta_data[0] : beta_data[channel];
double a = gamma / (std::sqrt(var + params().epsilon));
double b = -mean * a + beta;

for (int32_t height = 0; height < heights; height++)
{
double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
for (int32_t width = 0; width < widths; width++)
{
double input_value =
input_data[tflite::Offset(output_shape, batch, height, width, channel)];
double output_value = input_value * a + b;
output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
activation_max);
}
}
}
}
}
else if (input_shape.DimensionsCount() == 3)
{
// Dimensions for non image case are (N x C x D1 x D2 … Dn)
const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
const int32_t channels = tflite::MatchingDim(input_shape, 1, output_shape, 1);
const int32_t size = tflite::MatchingDim(input_shape, 2, output_shape, 2);
for (int32_t batch = 0; batch < batches; batch++)
{
for (int32_t channel = 0; channel < channels; channel++)
{
double sum = 0.0f;
double square_sum = 0.0f;
size_t offset =
static_cast<size_t>(batch * channels * size) + static_cast<size_t>(channel * size);
for (int32_t i = 0; i < size; i++)
{
double input_val = input_data[offset + i];
sum += input_val;
square_sum += (input_val * input_val);
}
}
double mean = sum / size;
double var = square_sum / size - mean * mean;
double mean = sum / size;
double var = square_sum / size - mean * mean;

double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
double beta = single_beta ? beta_data[0] : beta_data[channel];
double a = gamma / (std::sqrt(var + params().epsilon));
double b = -mean * a + beta;
double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
double beta = single_beta ? beta_data[0] : beta_data[channel];
double a = gamma / (std::sqrt(var + params().epsilon));
double b = -mean * a + beta;

for (int32_t height = 0; height < heights; height++)
{
for (int32_t width = 0; width < widths; width++)
for (int32_t i = 0; i < size; i++)
{
double input_value =
input_data[tflite::Offset(output_shape, batch, height, width, channel)];
double input_value = input_data[offset + i];
double output_value = input_value * a + b;
output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
activation_max);
output_data[offset + i] = tflite::ActivationFunctionWithMinMax(
(float)output_value, activation_min, activation_max);
}
}
}
}
else
throw std::runtime_error("luci-intp InstanceNorm unsupported rank.");
}

} // namespace kernels
Expand Down
53 changes: 53 additions & 0 deletions compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ TEST_F(InstanceNormTest, Single_gamma_beta)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
}

TEST_F(InstanceNormTest, Single_gamma_beta_3D)
{
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 1, 1, 1}, _memory_manager.get());
Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);

InstanceNormParams params{};
params.epsilon = 0.1f;
params.activation = Activation::NONE;

InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
kernel.execute();

EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
}

TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
{
Tensor input_tensor =
Expand All @@ -92,6 +113,38 @@ TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
EXPECT_ANY_THROW(kernel.configure());
}

TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_3D_NEG)
{
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 1, 1, 1}, _memory_manager.get());
Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);

InstanceNormParams params{};
params.epsilon = 0.1f;
params.activation = Activation::NONE;

InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
}

TEST_F(InstanceNormTest, Unsupported_dims_NEG)
{
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>({2, 2}, {1, 1, 1, 1}, _memory_manager.get());
Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1, 1}, _memory_manager.get());
Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({2}, {2, 2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);

InstanceNormParams params{};
params.epsilon = 0.1f;
params.activation = Activation::NONE;

InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
}

} // namespace
} // namespace kernels
} // namespace luci_interpreter
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class Algorithm final : public luci::CircleNodeVisitor<loco::TensorShape>
// loco::TensorShape visit(const luci::CircleBatchToSpaceND *node) final;
// loco::TensorShape visit(const luci::CircleCast *node) final;
// loco::TensorShape visit(const luci::CircleCeil *node) final;
// loco::TensorShape visit(const luci::CircleConcatenation *node) final;
loco::TensorShape visit(const luci::CircleConcatenation *node) final;
// loco::TensorShape visit(const luci::CircleConst *node) final;
// loco::TensorShape visit(const luci::CircleConv2D *node) final;
// loco::TensorShape visit(const luci::CircleCos *node) final;
Expand Down
69 changes: 0 additions & 69 deletions compiler/luci/service/src/CircleShapeInferenceRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,70 +539,6 @@ loco::NodeShape infer_broadcast_to(const luci::CircleBroadcastTo *node)
return loco::NodeShape{shape_by_input};
}

loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
{
// TODO Support when CircleConcatenation has 0 input
assert(node->numValues() > 0);

auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
auto axis = node->axis();
if (axis < 0)
axis += first_shape.rank();

assert(0 <= axis);
assert(first_shape.rank() > static_cast<uint32_t>(axis));

loco::TensorShape output_shape;

output_shape.rank(first_shape.rank());
for (uint32_t i = 0; i < output_shape.rank(); ++i)
output_shape.dim(i) = first_shape.dim(i);

for (uint32_t i = 1; i < node->numValues(); ++i)
{
auto input_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
if (input_shape.rank() != output_shape.rank())
INTERNAL_EXN_V("Input has incompatible shape", node->name());

for (uint32_t j = 0; j < output_shape.rank(); ++j)
{
if (j == static_cast<uint32_t>(axis))
{
if (output_shape.dim(j).known() and input_shape.dim(j).known())
{
output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
}
else
{
// If any of inputs is unknown, just mark it as unknown.
output_shape.dim(j).unset();
}
}
else
{
if (output_shape.dim(j).known() and input_shape.dim(j).known())
{
if (output_shape.dim(j).value() != input_shape.dim(j).value())
{
INTERNAL_EXN_V("Input has incompatible shape.", node->name());
}
}
else
{
if (input_shape.dim(j).known())
{
assert(not output_shape.dim(j).known()); // FIX_ME_UNLESS
output_shape.dim(j) = input_shape.dim(j);
}
// For unknown input_shape, leave output_shape as-is
}
}
}
}

return loco::NodeShape{output_shape};
}

loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
{
LOGGER(l);
Expand Down Expand Up @@ -2118,11 +2054,6 @@ class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeS

loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }

loco::NodeShape visit(const luci::CircleConcatenation *node) final
{
return infer_concatenation(node);
}

loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }

loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); }
Expand Down
Loading

0 comments on commit 2adb456

Please sign in to comment.