Skip to content

Commit

Permalink
npu version adaption (#3176)
Browse files Browse the repository at this point in the history
  • Loading branch information
hust17yixuan authored Sep 26, 2024
1 parent 3c9d327 commit bd0c65e
Show file tree
Hide file tree
Showing 8 changed files with 9 additions and 8 deletions.
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
bboxesFP32 = bboxesFP32.to(at::kFloat);
gtboxesFP32 = gtboxesFP32.to(at::kFloat);
}
c10::SmallVector<int64_t, SIZE> iousSize = {gtboxesFP32.size(0),
c10::SmallVector<int64_t, 8> iousSize = {gtboxesFP32.size(0),
bboxesFP32.size(0)};
if (aligned) {
iousSize = {gtboxesFP32.size(0), 1};
Expand Down
1 change: 1 addition & 0 deletions mmcv/ops/csrc/pytorch/npu/common_util.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#ifndef MMCV_OPS_CSRC_COMMON__UTIL_HPP_
#define MMCV_OPS_CSRC_COMMON__UTIL_HPP_

const int SIZE = 8;

#endif // MMCV_OPS_CSRC_COMMON__UTIL_HPP_
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
if (grad == 0) {
auto input_size = input.sizes();
int input_length = input_size.size();
c10::SmallVector<int64_t, SIZE> input_size_tmp;
c10::SmallVector<int64_t, 8> input_size_tmp;
for (uint64_t i = 0; i < input_size.size(); i++) {
input_size_tmp.emplace_back(input_size[i]);
}
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
at::Tensor max_outputsize_y =
at::empty({}, boxes.options().dtype(at::kInt)).fill_(boxes.size(0));

c10::SmallVector<int64_t, SIZE> outputsize = {boxes.size(0)};
c10::SmallVector<int64_t, 8> outputsize = {boxes.size(0)};
at::Tensor output =
at::empty(outputsize, boxes.options().dtype(at::kInt)).fill_(-1);
OpCommand cmd;
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
detsCast = detsCast.to(at::kFloat);
scoresCast = scoresCast.to(at::kFloat);
}
c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
c10::SmallVector<int64_t, 8> selectedIndexSize = {dets.size(0)};

at::Tensor selectedBox = at::empty_like(dets);
at::Tensor selectedIndex =
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
roi_end_mode = 0;
}
auto shape = grad_input.sizes();
c10::SmallVector<int64_t, SIZE> xdiff_shape;
c10::SmallVector<int64_t, 8> xdiff_shape;
for (uint64_t i = 0; i < shape.size(); i++) {
xdiff_shape.emplace_back(shape[i]);
}
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void roi_align_rotated_backward_npu(Tensor top_grad, Tensor rois,
int64_t aligned_height_64 = aligned_height;
int64_t aligned_width_64 = aligned_width;
int64_t sampling_ratio_64 = sampling_ratio;
c10::SmallVector<int64_t, SIZE> y_grad_shape;
c10::SmallVector<int64_t, 8> y_grad_shape;
auto shape = bottom_grad.sizes();
for (uint64_t i = 0; i < shape.size(); i++) {
y_grad_shape.emplace_back(shape[i]);
Expand Down
4 changes: 2 additions & 2 deletions mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ void roipoint_pool3d_forward_impl_npu(int batch_size, int pts_num,
Tensor pooled_empty_flag) {
auto points_trans = xyz.transpose(1, 2).contiguous();
auto point_features_trans = pts_feature.transpose(1, 2).contiguous();
c10::SmallVector<int64_t, SIZE> features_trans_size = {
c10::SmallVector<int64_t, 8> features_trans_size = {
xyz.size(0), boxes3d.size(1), xyz.size(2) + pts_feature.size(2),
sampled_pts_num};
at::Tensor pooled_features_trans =
at::empty(features_trans_size, xyz.options());
c10::SmallVector<int64_t, SIZE> empty_flag_size = {boxes3d.size(0),
c10::SmallVector<int64_t, 8> empty_flag_size = {boxes3d.size(0),
boxes3d.size(1)};
EXEC_NPU_CMD(aclnnRoipointPool3dForward, points_trans, point_features_trans,
boxes3d, sampled_pts_num, pooled_features_trans,
Expand Down

0 comments on commit bd0c65e

Please sign in to comment.