[Feature] Add the support for stack_group_points ops with Ascend (#3023)

open-mmlab · Jan 29, 2024 · 815f5d7 · 815f5d7
1 parent c7c02a7
commit 815f5d7
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 3 deletions.
diff --git a/mmcv/ops/csrc/pytorch/npu/stack_group_points_npu.cpp b/mmcv/ops/csrc/pytorch/npu/stack_group_points_npu.cpp
@@ -0,0 +1,25 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void stack_group_points_forward_npu(int b, int c, int n, int nsample,
+                                    const Tensor features_tensor,
+                                    const Tensor features_batch_cnt_tensor,
+                                    const Tensor idx_tensor,
+                                    const Tensor idx_batch_cnt_tensor,
+                                    Tensor out_tensor) {
+  EXEC_NPU_CMD(aclnnStackGroupPoints, features_tensor,
+               features_batch_cnt_tensor, idx_tensor, idx_batch_cnt_tensor,
+               out_tensor);
+}
+
+void stack_group_points_forward_impl(int b, int c, int n, int nsample,
+                                     const Tensor features_tensor,
+                                     const Tensor features_batch_cnt_tensor,
+                                     const Tensor idx_tensor,
+                                     const Tensor idx_batch_cnt_tensor,
+                                     Tensor out_tensor);
+
+REGISTER_NPU_IMPL(stack_group_points_forward_impl,
+                  stack_group_points_forward_npu);
diff --git a/tests/test_ops/test_group_points.py b/tests/test_ops/test_group_points.py
@@ -72,10 +72,20 @@ def test_grouping_points(dtype, device):
     assert torch.allclose(output, expected_output)
 
 
-@pytest.mark.skipif(
-    not torch.cuda.is_available(), reason='requires CUDA support')
+@pytest.mark.parametrize('device', [
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+    pytest.param(
+        'npu',
+        marks=pytest.mark.skipif(
+            not IS_NPU_AVAILABLE, reason='requires NPU support'))
+])
 @pytest.mark.parametrize('dtype', [torch.half, torch.float, torch.double])
-def test_stack_grouping_points(dtype):
+def test_stack_grouping_points(dtype, device):
+    if device == 'npu' and dtype == torch.double:
+        return
     idx = torch.tensor([[0, 0, 0], [3, 3, 3], [8, 8, 8], [1, 1, 1], [0, 0, 0],
                         [2, 2, 2], [0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0],
                         [1, 1, 1], [0, 0, 0]]).int().cuda()