add npu adapter:

momo609 · momo609 · commit 9f736706ab9a · 2023-10-11T10:13:16.000+08:00
diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md
@@ -26,7 +26,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | FurthestPointSampleWithDist  |     | √    |     |     |        |
 | FusedBiasLeakyrelu           |     | √    |     |     | √      |
 | GatherPoints                 |     | √    |     |     | √      |
-| GroupPoints                  |     | √    |     |     |        |
+| GroupPoints                  |     | √    |     |     | √      |
 | Iou3d                        |     | √    | √   |     |        |
 | KNN                          |     | √    |     |     |        |
 | MaskedConv                   |     | √    | √   |     | √      |
@@ -44,7 +44,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | RotatedFeatureAlign          | √   | √    | √   |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
 | RoIPool                      |     | √    | √   |     | √      |
-| RoIAlignRotated              | √   | √    | √   |     |        |
+| RoIAlignRotated              | √   | √    | √   |     | √      |
 | RiRoIAlignRotated            |     | √    |     |     |        |
 | RoIAlign                     | √   | √    | √   |     | √      |
 | RoIAwarePool3d               |     | √    | √   |     |        |
diff --git a/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
@@ -0,0 +1,64 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void roi_align_rotated_forward_npu(Tensor input, Tensor rois, Tensor output,
+                                    int aligned_height, int aligned_width,
+                                    float spatial_scale, int sampling_ratio,
+                                    bool aligned, bool clockwise) {
+  int64_t aligned_height_64 = aligned_height;
+  int64_t aligned_width_64 = aligned_width;
+  int64_t sampling_ratio_64 = sampling_ratio;
+  OpCommand cmd;
+  cmd.Name("RoiAlignRotated")
+      .Input(input)
+      .Input(rois)
+      .Output(output)
+      .Attr("pooled_h", aligned_height_64)
+      .Attr("pooled_w", aligned_width_64)
+      .Attr("spatial_scale", spatial_scale)
+      .Attr("sampling_ratio", sampling_ratio_64)
+      .Attr("aligned", aligned)
+      .Attr("clockwise", clockwise)
+      .Run();
+}
+
+void roi_align_rotated_backward_npu(Tensor top_grad, Tensor rois,
+                                     Tensor bottom_grad, int aligned_height,
+                                     int aligned_width, float spatial_scale,
+                                     int sampling_ratio, bool aligned,
+                                     bool clockwise) {
+  int64_t aligned_height_64 = aligned_height;
+  int64_t aligned_width_64 = aligned_width;
+  int64_t sampling_ratio_64 = sampling_ratio;
+  c10::SmallVector<int64_t, SIZE> y_grad_shape =
+      array_to_small_vector(bottom_grad.sizes());
+  OpCommand cmd;
+  cmd.Name("RoiAlignRotatedGrad")
+      .Input(top_grad)
+      .Input(rois)
+      .Output(bottom_grad)
+      .Attr("y_grad_shape", y_grad_shape)
+      .Attr("pooled_h", aligned_width_64)
+      .Attr("pooled_w", aligned_height_64)
+      .Attr("spatial_scale", spatial_scale)
+      .Attr("sampling_ratio", sampling_ratio_64)
+      .Attr("aligned", aligned)
+      .Attr("clockwise", clockwise)
+      .Run();
+}
+
+void roi_align_rotated_forward_impl(Tensor input, Tensor rois, Tensor output,
+                                    int aligned_height, int aligned_width,
+                                    float spatial_scale, int sampling_ratio,
+                                    bool aligned, bool clockwise);
+
+void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
+                                     Tensor bottom_grad, int aligned_height,
+                                     int aligned_width, float spatial_scale,
+                                     int sampling_ratio, bool aligned,
+                                     bool clockwise);
+
+REGISTER_NPU_IMPL(roi_align_rotated_forward_impl, roi_align_rotated_forward_npu);
+REGISTER_NPU_IMPL(roi_align_rotated_backward_impl, roi_align_rotated_backward_npu);
diff --git a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
@@ -50,23 +50,29 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
   int64_t pooled_height_64 = pooled_height;
   int64_t pooled_width_64 = pooled_width;
   int64_t pooled_channel = 1;
+  at::Tensor argmax_trans = argmax.transpose(1, 2).transpose(2, 3);
+  at::Tensor grad_output_trans = grad_output.transpose(1, 2).transpose(2, 3);
   at::Tensor roi_actual_num =
       at::empty_like(rois, rois.options().dtype(at::kInt));
-  at::Tensor x = at::ones_like(grad_input);
+  at::Tensor x = at::ones_like(grad_input).transpose(1, 2).transpose(2, 3);
+  at::Tensor y = at::zeros_like(x);
   OpCommand cmd;
   cmd.Name("RoiPoolingGradWithArgMax")
-      .Input(grad_output)
+      .Input(grad_output_trans)
       .Input(x)
       .Input(rois)
       .Input(roi_actual_num)
-      .Input(argmax)
-      .Output(grad_input)
+      .Input(argmax_trans)
+      .Output(y)
       .Attr("pooled_h", pooled_height_64)
       .Attr("pooled_w", pooled_width_64)
       .Attr("spatial_scale_h", spatial_scale)
       .Attr("spatial_scale_w", spatial_scale)
       .Attr("pool_channel", pooled_channel)
       .Run();
+  at::Tensor result = y.transpose(2, 3).transpose(1, 2);
+  at::Tensor res = NpuUtils::format_contiguous(result);
+  grad_input.copy_(res);
 }
 
 void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,