Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <TH/TH.h>
#include <algorithm>
#include "ps_roi_pool_kernel.h"

namespace {

template <class T>
inline void add(T* address, const T& val) {
*address += val;
}

template <typename T>
void PSROIPoolForward(
void ps_roi_pool_forward_kernel_impl(
const T* input,
const T spatial_scale,
int channels,
Expand Down Expand Up @@ -79,7 +78,7 @@ void PSROIPoolForward(
}

template <typename T>
void PSROIPoolBackward(
void ps_roi_pool_backward_kernel_impl(
const T* grad_output,
const int* channel_mapping,
int num_rois,
Expand Down Expand Up @@ -143,7 +142,9 @@ void PSROIPoolBackward(
}
}

std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
} // namespace

std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand All @@ -157,7 +158,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(

at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};

at::CheckedFrom c = "PSROIPool_forward_cpu";
at::CheckedFrom c = "ps_roi_pool_forward_cpu";
at::checkAllSameType(c, {input_t, rois_t});

int num_rois = rois.size(0);
Expand All @@ -182,8 +183,8 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(

auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "PSROIPool_forward", [&] {
PSROIPoolForward<scalar_t>(
input.scalar_type(), "ps_roi_pool_forward", [&] {
ps_roi_pool_forward_kernel_impl<scalar_t>(
input_.data_ptr<scalar_t>(),
spatial_scale,
channels,
Expand All @@ -200,7 +201,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
return std::make_tuple(output, channel_mapping);
}

at::Tensor PSROIPool_backward_cpu(
at::Tensor ps_roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
Expand All @@ -221,7 +222,7 @@ at::Tensor PSROIPool_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3};

at::CheckedFrom c = "PSROIPool_backward_cpu";
at::CheckedFrom c = "ps_roi_pool_backward_cpu";
at::checkAllSameType(c, {grad_t, rois_t});

auto num_rois = rois.size(0);
Expand All @@ -237,8 +238,8 @@ at::Tensor PSROIPool_backward_cpu(

auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "PSROIPool_backward", [&] {
PSROIPoolBackward<scalar_t>(
grad.scalar_type(), "ps_roi_pool_backward", [&] {
ps_roi_pool_backward_kernel_impl<scalar_t>(
grad_.data_ptr<scalar_t>(),
channel_mapping.data_ptr<int>(),
num_rois,
Expand Down
23 changes: 23 additions & 0 deletions torchvision/csrc/cpu/ps_roi_pool_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include <ATen/ATen.h>
#include "../macros.h"

VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor ps_roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
19 changes: 0 additions & 19 deletions torchvision/csrc/cpu/vision_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,6 @@

// TODO: Delete this file once all the methods are gone

VISION_API std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor PSROIPool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);

VISION_API at::Tensor ROIAlign_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <THC/THCAtomics.cuh>

#include "cuda_helpers.h"
#include "ps_roi_pool_kernel.h"

namespace {

template <typename T>
__global__ void PSROIPoolForward(
__global__ void ps_roi_pool_forward_kernel_impl(
int nthreads,
const T* input,
const T spatial_scale,
Expand Down Expand Up @@ -73,7 +74,7 @@ __global__ void PSROIPoolForward(
}

template <typename T>
__global__ void PSROIPoolBackward(
__global__ void ps_roi_pool_backward_kernel_impl(
int nthreads,
const T* grad_output,
const int* channel_mapping,
Expand Down Expand Up @@ -132,7 +133,9 @@ __global__ void PSROIPoolBackward(
}
}

std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(
} // namespace

std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand All @@ -146,7 +149,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(

at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};

at::CheckedFrom c = "PSROIPool_forward_cuda";
at::CheckedFrom c = "ps_roi_pool_forward_cuda";
at::checkAllSameGPU(c, {input_t, rois_t});
at::checkAllSameType(c, {input_t, rois_t});

Expand Down Expand Up @@ -183,8 +186,8 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(
auto input_ = input.contiguous(),
rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "PSROIPool_forward", [&] {
PSROIPoolForward<scalar_t><<<grid, block, 0, stream>>>(
input.scalar_type(), "ps_roi_pool_forward", [&] {
ps_roi_pool_forward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
output_size,
input_.data_ptr<scalar_t>(),
spatial_scale,
Expand All @@ -202,7 +205,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(
return std::make_tuple(output, channel_mapping);
}

at::Tensor PSROIPool_backward_cuda(
at::Tensor ps_roi_pool_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
Expand All @@ -223,7 +226,7 @@ at::Tensor PSROIPool_backward_cuda(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3};

at::CheckedFrom c = "PSROIPool_backward_cuda";
at::CheckedFrom c = "ps_roi_pool_backward_cuda";
at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t});
at::checkAllSameType(c, {grad_t, rois_t});

Expand Down Expand Up @@ -251,8 +254,8 @@ at::Tensor PSROIPool_backward_cuda(
auto grad_ = grad.contiguous(),
rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "PSROIPool_backward", [&] {
PSROIPoolBackward<scalar_t><<<grid, block, 0, stream>>>(
grad.scalar_type(), "ps_roi_pool_backward", [&] {
ps_roi_pool_backward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
grad.numel(),
grad_.data_ptr<scalar_t>(),
channel_mapping.data_ptr<int>(),
Expand Down
23 changes: 23 additions & 0 deletions torchvision/csrc/cuda/ps_roi_pool_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include <ATen/ATen.h>
#include "../macros.h"

VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor ps_roi_pool_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
19 changes: 0 additions & 19 deletions torchvision/csrc/cuda/vision_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,6 @@

// TODO: Delete this file once all the methods are gone

VISION_API std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor PSROIPool_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);

VISION_API at::Tensor ROIAlign_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
Expand Down
26 changes: 11 additions & 15 deletions torchvision/csrc/PSROIPool.h → torchvision/csrc/ps_roi_pool.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
#pragma once
#include "ps_roi_pool.h"
#include <torch/extension.h>

#include "cpu/vision_cpu.h"

#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#endif

// TODO: put this stuff in torchvision namespace

std::tuple<at::Tensor, at::Tensor> ps_roi_pool(
const at::Tensor& input,
const at::Tensor& rois,
Expand All @@ -26,7 +18,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool(
}

#if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> PSROIPool_autocast(
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand Down Expand Up @@ -74,6 +66,8 @@ at::Tensor _ps_roi_pool_backward(
width);
}

namespace {

class PSROIPoolFunction : public torch::autograd::Function<PSROIPoolFunction> {
public:
static torch::autograd::variable_list forward(
Expand Down Expand Up @@ -166,7 +160,9 @@ class PSROIPoolBackwardFunction
}
};

std::tuple<at::Tensor, at::Tensor> PSROIPool_autograd(
} // namespace

std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand All @@ -178,7 +174,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_autograd(
return std::make_tuple(result[0], result[1]);
}

at::Tensor PSROIPool_backward_autograd(
at::Tensor ps_roi_pool_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
Expand Down
61 changes: 61 additions & 0 deletions torchvision/csrc/ps_roi_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#pragma once

#include "cpu/ps_roi_pool_kernel.h"

#ifdef WITH_CUDA
#include "cuda/ps_roi_pool_kernel.h"
#endif
#ifdef WITH_HIP
#include "hip/ps_roi_pool_kernel.h"
#endif

// C++ Forward
std::tuple<at::Tensor, at::Tensor> ps_roi_pool(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
#endif

// C++ Backward
at::Tensor _ps_roi_pool_backward(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);

// Autograd Forward and Backward
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

at::Tensor ps_roi_pool_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
Loading