Skip to content

Commit 896d7ec

Browse files
LukasBommesfmassa
authored andcommitted
Implementation for Position-sensitive ROI Pool/Align [updated] (#1410)
* added PSRoiAlign and PSRoiPool with C++ autograd and torch ops * fixed linter errors * fixed linter errors 2 * fixed linter errors 3
1 parent 63128cb commit 896d7ec

File tree

13 files changed

+2526
-0
lines changed

13 files changed

+2526
-0
lines changed

test/test_ops.py

Lines changed: 679 additions & 0 deletions
Large diffs are not rendered by default.

torchvision/csrc/PSROIAlign.h

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#pragma once
2+
3+
#include "cpu/vision_cpu.h"
4+
5+
#ifdef WITH_CUDA
6+
#include "cuda/vision_cuda.h"
7+
#endif
8+
9+
#include <iostream>
10+
11+
std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward(
12+
const at::Tensor& input,
13+
const at::Tensor& rois,
14+
const float spatial_scale,
15+
const int pooled_height,
16+
const int pooled_width,
17+
const int sampling_ratio) {
18+
if (input.type().is_cuda()) {
19+
#ifdef WITH_CUDA
20+
return PSROIAlign_forward_cuda(
21+
input,
22+
rois,
23+
spatial_scale,
24+
pooled_height,
25+
pooled_width,
26+
sampling_ratio);
27+
#else
28+
AT_ERROR("Not compiled with GPU support");
29+
#endif
30+
}
31+
return PSROIAlign_forward_cpu(
32+
input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
33+
}
34+
35+
at::Tensor PSROIAlign_backward(
36+
const at::Tensor& grad,
37+
const at::Tensor& rois,
38+
const at::Tensor& mapping_channel,
39+
const float spatial_scale,
40+
const int pooled_height,
41+
const int pooled_width,
42+
const int sampling_ratio,
43+
const int batch_size,
44+
const int channels,
45+
const int height,
46+
const int width) {
47+
if (grad.type().is_cuda()) {
48+
#ifdef WITH_CUDA
49+
return PSROIAlign_backward_cuda(
50+
grad,
51+
rois,
52+
mapping_channel,
53+
spatial_scale,
54+
pooled_height,
55+
pooled_width,
56+
sampling_ratio,
57+
batch_size,
58+
channels,
59+
height,
60+
width);
61+
#else
62+
AT_ERROR("Not compiled with GPU support");
63+
#endif
64+
}
65+
return PSROIAlign_backward_cpu(
66+
grad,
67+
rois,
68+
mapping_channel,
69+
spatial_scale,
70+
pooled_height,
71+
pooled_width,
72+
sampling_ratio,
73+
batch_size,
74+
channels,
75+
height,
76+
width);
77+
}
78+
79+
using namespace at;
80+
using torch::Tensor;
81+
using torch::autograd::AutogradContext;
82+
using torch::autograd::Variable;
83+
using torch::autograd::variable_list;
84+
85+
class PSROIAlignFunction
86+
: public torch::autograd::Function<PSROIAlignFunction> {
87+
public:
88+
static variable_list forward(
89+
AutogradContext* ctx,
90+
Variable input,
91+
Variable rois,
92+
const double spatial_scale,
93+
const int64_t pooled_height,
94+
const int64_t pooled_width,
95+
const int64_t sampling_ratio) {
96+
ctx->saved_data["spatial_scale"] = spatial_scale;
97+
ctx->saved_data["pooled_height"] = pooled_height;
98+
ctx->saved_data["pooled_width"] = pooled_width;
99+
ctx->saved_data["sampling_ratio"] = sampling_ratio;
100+
ctx->saved_data["input_shape"] = input.sizes();
101+
auto result = PSROIAlign_forward(
102+
input,
103+
rois,
104+
spatial_scale,
105+
pooled_height,
106+
pooled_width,
107+
sampling_ratio);
108+
auto output = std::get<0>(result);
109+
auto channel_mapping = std::get<1>(result);
110+
ctx->save_for_backward({rois, channel_mapping});
111+
ctx->mark_non_differentiable({channel_mapping});
112+
return {output, channel_mapping};
113+
}
114+
115+
static variable_list backward(
116+
AutogradContext* ctx,
117+
variable_list grad_output) {
118+
// Use data saved in forward
119+
auto saved = ctx->get_saved_variables();
120+
auto rois = saved[0];
121+
auto channel_mapping = saved[1];
122+
auto input_shape = ctx->saved_data["input_shape"].toIntList();
123+
auto grad_in = PSROIAlign_backward(
124+
grad_output[0],
125+
rois,
126+
channel_mapping,
127+
ctx->saved_data["spatial_scale"].toDouble(),
128+
ctx->saved_data["pooled_height"].toInt(),
129+
ctx->saved_data["pooled_width"].toInt(),
130+
ctx->saved_data["sampling_ratio"].toInt(),
131+
input_shape[0],
132+
input_shape[1],
133+
input_shape[2],
134+
input_shape[3]);
135+
return {
136+
grad_in, Variable(), Variable(), Variable(), Variable(), Variable()};
137+
}
138+
};
139+
140+
std::tuple<Tensor, Tensor> ps_roi_align(
141+
const Tensor& input,
142+
const Tensor& rois,
143+
const double spatial_scale,
144+
const int64_t pooled_height,
145+
const int64_t pooled_width,
146+
const int64_t sampling_ratio) {
147+
auto result = PSROIAlignFunction::apply(
148+
input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
149+
return std::tuple<Tensor, Tensor>(result[0], result[1]);
150+
}

torchvision/csrc/PSROIPool.h

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#pragma once
2+
3+
#include "cpu/vision_cpu.h"
4+
5+
#ifdef WITH_CUDA
6+
#include "cuda/vision_cuda.h"
7+
#endif
8+
9+
std::tuple<at::Tensor, at::Tensor> PSROIPool_forward(
10+
const at::Tensor& input,
11+
const at::Tensor& rois,
12+
const float spatial_scale,
13+
const int pooled_height,
14+
const int pooled_width) {
15+
if (input.type().is_cuda()) {
16+
#ifdef WITH_CUDA
17+
return PSROIPool_forward_cuda(
18+
input, rois, spatial_scale, pooled_height, pooled_width);
19+
#else
20+
AT_ERROR("Not compiled with GPU support");
21+
#endif
22+
}
23+
return PSROIPool_forward_cpu(
24+
input, rois, spatial_scale, pooled_height, pooled_width);
25+
}
26+
27+
at::Tensor PSROIPool_backward(
28+
const at::Tensor& grad,
29+
const at::Tensor& rois,
30+
const at::Tensor& mapping_channel,
31+
const float spatial_scale,
32+
const int pooled_height,
33+
const int pooled_width,
34+
const int batch_size,
35+
const int channels,
36+
const int height,
37+
const int width) {
38+
if (grad.type().is_cuda()) {
39+
#ifdef WITH_CUDA
40+
return PSROIPool_backward_cuda(
41+
grad,
42+
rois,
43+
mapping_channel,
44+
spatial_scale,
45+
pooled_height,
46+
pooled_width,
47+
batch_size,
48+
channels,
49+
height,
50+
width);
51+
#else
52+
AT_ERROR("Not compiled with GPU support");
53+
#endif
54+
}
55+
return PSROIPool_backward_cpu(
56+
grad,
57+
rois,
58+
mapping_channel,
59+
spatial_scale,
60+
pooled_height,
61+
pooled_width,
62+
batch_size,
63+
channels,
64+
height,
65+
width);
66+
}
67+
68+
using namespace at;
69+
using torch::Tensor;
70+
using torch::autograd::AutogradContext;
71+
using torch::autograd::Variable;
72+
using torch::autograd::variable_list;
73+
74+
class PSROIPoolFunction : public torch::autograd::Function<PSROIPoolFunction> {
75+
public:
76+
static variable_list forward(
77+
AutogradContext* ctx,
78+
Variable input,
79+
Variable rois,
80+
const double spatial_scale,
81+
const int64_t pooled_height,
82+
const int64_t pooled_width) {
83+
ctx->saved_data["spatial_scale"] = spatial_scale;
84+
ctx->saved_data["pooled_height"] = pooled_height;
85+
ctx->saved_data["pooled_width"] = pooled_width;
86+
ctx->saved_data["input_shape"] = input.sizes();
87+
auto result = PSROIPool_forward(
88+
input, rois, spatial_scale, pooled_height, pooled_width);
89+
auto output = std::get<0>(result);
90+
auto channel_mapping = std::get<1>(result);
91+
ctx->save_for_backward({rois, channel_mapping});
92+
ctx->mark_non_differentiable({channel_mapping});
93+
return {output, channel_mapping};
94+
}
95+
96+
static variable_list backward(
97+
AutogradContext* ctx,
98+
variable_list grad_output) {
99+
// Use data saved in forward
100+
auto saved = ctx->get_saved_variables();
101+
auto rois = saved[0];
102+
auto channel_mapping = saved[1];
103+
auto input_shape = ctx->saved_data["input_shape"].toIntList();
104+
auto grad_in = PSROIPool_backward(
105+
grad_output[0],
106+
rois,
107+
channel_mapping,
108+
ctx->saved_data["spatial_scale"].toDouble(),
109+
ctx->saved_data["pooled_height"].toInt(),
110+
ctx->saved_data["pooled_width"].toInt(),
111+
input_shape[0],
112+
input_shape[1],
113+
input_shape[2],
114+
input_shape[3]);
115+
return {grad_in, Variable(), Variable(), Variable(), Variable()};
116+
}
117+
};
118+
119+
std::tuple<Tensor, Tensor> ps_roi_pool(
120+
const Tensor& input,
121+
const Tensor& rois,
122+
const double spatial_scale,
123+
const int64_t pooled_height,
124+
const int64_t pooled_width) {
125+
auto result = PSROIPoolFunction::apply(
126+
input, rois, spatial_scale, pooled_height, pooled_width);
127+
return std::tuple<Tensor, Tensor>(result[0], result[1]);
128+
}

0 commit comments

Comments
 (0)