Merge remote-tracking branch 'upstream/master' into jiwaszki/fuse_mul…

…_fc_luci_test
Samsung · Aug 22, 2024 · 2adb456 · 2adb456
2 parents 4ea2047 + e871367
commit 2adb456
Show file tree

Hide file tree

Showing 61 changed files with 1,848 additions and 267 deletions.
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -118,6 +118,8 @@ int entry(int argc, char **argv)
              "This will fuse Mul operation with a preceding Conv if possible.");
   add_switch(arser, "--fuse_mul_with_div",
              "This will fuse Mul operation with a Div operation whose numerator is const.");
+  add_switch(arser, "--fuse_mul_with_fullyconnected",
+             "This will fuse Mul operator with a preceding FullyConnected operator.");
   add_switch(arser, "--fuse_slice_with_tconv",
              "This will fuse Slice operation with a preceding TConv if possible.");
   add_switch(arser, "--fuse_transpose_with_mean",
@@ -326,6 +328,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FuseMulWithConv);
   if (arser.get<bool>("--fuse_mul_with_div"))
     options->enable(Algorithms::FuseMulWithDiv);
+  if (arser.get<bool>("--fuse_mul_with_fullyconnected"))
+    options->enable(Algorithms::FuseMulWithFullyConnected);
   if (arser.get<bool>("--make_batchnorm_gamma_positive"))
     options->enable(Algorithms::MakeBatchNormGammaPositive);
   if (arser.get<bool>("--fuse_preactivation_batchnorm"))

diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -34,16 +34,35 @@ InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tenso
 
 void InstanceNorm::configure()
 {
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
   LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
-  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
-                         gamma()->shape().dim(0) == 1);
-  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
-  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
-                         beta()->shape().dim(0) == 1);
+  if (input()->shape().num_dims() == 4)
+  {
+    LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+                           gamma()->shape().dim(0) == 1);
+    LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+    LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+                           beta()->shape().dim(0) == 1);
+  }
+  else if (input()->shape().num_dims() == 3)
+  {
+    LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 3);
+    LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+    LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+    LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(1) ||
+                           gamma()->shape().dim(0) == 1);
+    LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+    LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(1) ||
+                           beta()->shape().dim(0) == 1);
+  }
+  else
+    LUCI_INTERPRETER_CHECK(false && "luci-intp InstanceNorm unsupported rank.");
+
   output()->resize(input()->shape());
 }
 
@@ -63,12 +82,9 @@ void InstanceNorm::evalFloat() const
 {
   float activation_min, activation_max;
   calculateActivationRange(params().activation, &activation_min, &activation_max);
-  auto input_shape = getTensorShape(input());
+  tflite::RuntimeShape input_shape = getTensorShape(input());
   auto output_shape = getTensorShape(output());
-  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
-  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
-  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
-  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
   const float *input_data = getTensorData<float>(input());
   const float *gamma_data = getTensorData<float>(gamma());
   auto gamma_shape = getTensorShape(gamma());
@@ -77,44 +93,94 @@ void InstanceNorm::evalFloat() const
   auto beta_shape = getTensorShape(beta());
   bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
   float *output_data = getTensorData<float>(output());
-  for (int32_t batch = 0; batch < batches; batch++)
+
+  if (input_shape.DimensionsCount() == 4)
   {
-    for (int32_t channel = 0; channel < channels; channel++)
+    // Dimensions for image case are (N x H x W x C)
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+    const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+    const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+    for (int32_t batch = 0; batch < batches; batch++)
     {
-      double sum = 0.0f;
-      double square_sum = 0.0f;
-      int32_t size = heights * widths;
-      for (int32_t height = 0; height < heights; height++)
+      for (int32_t channel = 0; channel < channels; channel++)
       {
-        for (int32_t width = 0; width < widths; width++)
+        double sum = 0.0f;
+        double square_sum = 0.0f;
+        int32_t size = heights * widths;
+        for (int32_t height = 0; height < heights; height++)
+        {
+          for (int32_t width = 0; width < widths; width++)
+          {
+            double input_val =
+              input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+            sum += input_val;
+            square_sum += (input_val * input_val);
+          }
+        }
+        double mean = sum / size;
+        double var = square_sum / size - mean * mean;
+
+        double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+        double beta = single_beta ? beta_data[0] : beta_data[channel];
+        double a = gamma / (std::sqrt(var + params().epsilon));
+        double b = -mean * a + beta;
+
+        for (int32_t height = 0; height < heights; height++)
         {
-          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+          for (int32_t width = 0; width < widths; width++)
+          {
+            double input_value =
+              input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+            double output_value = input_value * a + b;
+            output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+              tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                   activation_max);
+          }
+        }
+      }
+    }
+  }
+  else if (input_shape.DimensionsCount() == 3)
+  {
+    // Dimensions for non image case are (N x C x D1 x D2 … Dn)
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t channels = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+    const int32_t size = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+    for (int32_t batch = 0; batch < batches; batch++)
+    {
+      for (int32_t channel = 0; channel < channels; channel++)
+      {
+        double sum = 0.0f;
+        double square_sum = 0.0f;
+        size_t offset =
+          static_cast<size_t>(batch * channels * size) + static_cast<size_t>(channel * size);
+        for (int32_t i = 0; i < size; i++)
+        {
+          double input_val = input_data[offset + i];
           sum += input_val;
           square_sum += (input_val * input_val);
         }
-      }
-      double mean = sum / size;
-      double var = square_sum / size - mean * mean;
+        double mean = sum / size;
+        double var = square_sum / size - mean * mean;
 
-      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
-      double beta = single_beta ? beta_data[0] : beta_data[channel];
-      double a = gamma / (std::sqrt(var + params().epsilon));
-      double b = -mean * a + beta;
+        double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+        double beta = single_beta ? beta_data[0] : beta_data[channel];
+        double a = gamma / (std::sqrt(var + params().epsilon));
+        double b = -mean * a + beta;
 
-      for (int32_t height = 0; height < heights; height++)
-      {
-        for (int32_t width = 0; width < widths; width++)
+        for (int32_t i = 0; i < size; i++)
         {
-          double input_value =
-            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+          double input_value = input_data[offset + i];
           double output_value = input_value * a + b;
-          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
-            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
-                                                 activation_max);
+          output_data[offset + i] = tflite::ActivationFunctionWithMinMax(
+            (float)output_value, activation_min, activation_max);
         }
       }
     }
   }
+  else
+    throw std::runtime_error("luci-intp InstanceNorm unsupported rank.");
 }
 
 } // namespace kernels

diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -76,6 +76,27 @@ TEST_F(InstanceNormTest, Single_gamma_beta)
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
 }
 
+TEST_F(InstanceNormTest, Single_gamma_beta_3D)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
 TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
 {
   Tensor input_tensor =
@@ -92,6 +113,38 @@ TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
   EXPECT_ANY_THROW(kernel.configure());
 }
 
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_3D_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(InstanceNormTest, Unsupported_dims_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({2}, {2, 2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
@@ -56,7 +56,7 @@ class Algorithm final : public luci::CircleNodeVisitor<loco::TensorShape>
   // loco::TensorShape visit(const luci::CircleBatchToSpaceND *node) final;
   // loco::TensorShape visit(const luci::CircleCast *node) final;
   // loco::TensorShape visit(const luci::CircleCeil *node) final;
-  // loco::TensorShape visit(const luci::CircleConcatenation *node) final;
+  loco::TensorShape visit(const luci::CircleConcatenation *node) final;
   // loco::TensorShape visit(const luci::CircleConst *node) final;
   // loco::TensorShape visit(const luci::CircleConv2D *node) final;
   // loco::TensorShape visit(const luci::CircleCos *node) final;

diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -539,70 +539,6 @@ loco::NodeShape infer_broadcast_to(const luci::CircleBroadcastTo *node)
   return loco::NodeShape{shape_by_input};
 }
 
-loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
-{
-  // TODO Support when CircleConcatenation has 0 input
-  assert(node->numValues() > 0);
-
-  auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
-  auto axis = node->axis();
-  if (axis < 0)
-    axis += first_shape.rank();
-
-  assert(0 <= axis);
-  assert(first_shape.rank() > static_cast<uint32_t>(axis));
-
-  loco::TensorShape output_shape;
-
-  output_shape.rank(first_shape.rank());
-  for (uint32_t i = 0; i < output_shape.rank(); ++i)
-    output_shape.dim(i) = first_shape.dim(i);
-
-  for (uint32_t i = 1; i < node->numValues(); ++i)
-  {
-    auto input_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
-    if (input_shape.rank() != output_shape.rank())
-      INTERNAL_EXN_V("Input has incompatible shape", node->name());
-
-    for (uint32_t j = 0; j < output_shape.rank(); ++j)
-    {
-      if (j == static_cast<uint32_t>(axis))
-      {
-        if (output_shape.dim(j).known() and input_shape.dim(j).known())
-        {
-          output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
-        }
-        else
-        {
-          // If any of inputs is unknown, just mark it as unknown.
-          output_shape.dim(j).unset();
-        }
-      }
-      else
-      {
-        if (output_shape.dim(j).known() and input_shape.dim(j).known())
-        {
-          if (output_shape.dim(j).value() != input_shape.dim(j).value())
-          {
-            INTERNAL_EXN_V("Input has incompatible shape.", node->name());
-          }
-        }
-        else
-        {
-          if (input_shape.dim(j).known())
-          {
-            assert(not output_shape.dim(j).known()); // FIX_ME_UNLESS
-            output_shape.dim(j) = input_shape.dim(j);
-          }
-          // For unknown input_shape, leave output_shape as-is
-        }
-      }
-    }
-  }
-
-  return loco::NodeShape{output_shape};
-}
-
 loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
 {
   LOGGER(l);
@@ -2118,11 +2054,6 @@ class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeS
 
   loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
 
-  loco::NodeShape visit(const luci::CircleConcatenation *node) final
-  {
-    return infer_concatenation(node);
-  }
-
   loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
 
   loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); }