Revert "Revert "Register Torchvision Ops as Cutom Ops (#1267)" (#1316)"

fmassa · web-flow · commit cfb8557a57ca · 2019-09-09T15:34:58.000+02:00
This reverts commit fe234fc.
diff --git a/.travis.yml b/.travis.yml
@@ -47,6 +47,10 @@ before_install:
   - pip install future
   - pip install pytest pytest-cov codecov
   - pip install mock
+  - |
+    if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then
+      pip install onnxruntime
+    fi
   - conda install av -c conda-forge
 
 
diff --git a/setup.py b/setup.py
@@ -96,12 +96,21 @@ def get_extensions():
     source_models = [os.path.join(models_dir, s) for s in source_models]
     tests = test_file + source_models
 
+    custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
+                          os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
+                          os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
+                          os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
+    custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
+                               os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
+                               os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
+
     define_macros = []
 
     extra_compile_args = {}
     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
         extension = CUDAExtension
         sources += source_cuda
+        custom_ops_sources += custom_ops_sources_cuda
         define_macros += [('WITH_CUDA', None)]
         nvcc_flags = os.getenv('NVCC_FLAGS', '')
         if nvcc_flags == '':
@@ -138,7 +147,14 @@ def get_extensions():
             include_dirs=tests_include_dirs,
             define_macros=define_macros,
             extra_compile_args=extra_compile_args,
-        )
+        ),
+        extension(
+            "torchvision._custom_ops",
+            sources=custom_ops_sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        ),
     ]
 
     return ext_modules
@@ -179,5 +195,6 @@ def run(self):
         "scipy": ["scipy"],
     },
     ext_modules=get_extensions(),
-    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension, 'clean': clean}
+    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
+              'clean': clean}
 )
diff --git a/test/test_onnx.py b/test/test_onnx.py
@@ -0,0 +1,88 @@
+import io
+import torch
+from torchvision import ops
+
+# onnxruntime requires python 3.5 or above
+try:
+    import onnxruntime
+except ImportError:
+    onnxruntime = None
+
+import unittest
+
+
+@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
+class ONNXExporterTester(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        torch.manual_seed(123)
+
+    def run_model(self, model, inputs):
+        model.eval()
+
+        # run pytorch model
+        with torch.no_grad():
+            if isinstance(inputs, torch.Tensor):
+                inputs = (inputs,)
+            outputs = model(*inputs)
+            if isinstance(outputs, torch.Tensor):
+                outputs = (outputs,)
+
+        onnx_io = io.BytesIO()
+        # export to onnx
+        torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
+
+        # validate the exported model with onnx runtime
+        self.ort_validate(onnx_io, inputs, outputs)
+
+    def ort_validate(self, onnx_io, inputs, outputs):
+
+        inputs, _ = torch.jit._flatten(inputs)
+        outputs, _ = torch.jit._flatten(outputs)
+
+        def to_numpy(tensor):
+            if tensor.requires_grad:
+                return tensor.detach().cpu().numpy()
+            else:
+                return tensor.cpu().numpy()
+
+        inputs = list(map(to_numpy, inputs))
+        outputs = list(map(to_numpy, outputs))
+
+        ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
+        # compute onnxruntime output prediction
+        ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
+        ort_outs = ort_session.run(None, ort_inputs)
+
+        for i in range(0, len(outputs)):
+            torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
+
+    def test_nms(self):
+        boxes = torch.rand(5, 4)
+        boxes[:, 2:] += torch.rand(5, 2)
+        scores = torch.randn(5)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, scores):
+                return ops.nms(boxes, scores, 0.5)
+
+        self.run_model(Module(), (boxes, scores))
+
+    def test_roi_pool(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2)
+        self.run_model(model, (x, single_roi))
+
+    def test_roi_align(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        pool_h = 5
+        pool_w = 5
+        model = ops.RoIPool((pool_h, pool_w), 2)
+        model.eval()
+        self.run_model(model, (x, rois))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h
@@ -10,11 +10,11 @@
 at::Tensor ROIAlign_forward(
     const at::Tensor& input, // Input feature map.
     const at::Tensor& rois, // List of ROIs to pool over.
-    const float spatial_scale, // The scale of the image features. ROIs will be
+    const double spatial_scale, // The scale of the image features. ROIs will be
     // scaled to this.
-    const int pooled_height, // The height of the pooled feature map.
-    const int pooled_width, // The width of the pooled feature
-    const int sampling_ratio) // The number of points to sample in each bin
+    const int64_t pooled_height, // The height of the pooled feature map.
+    const int64_t pooled_width, // The width of the pooled feature
+    const int64_t sampling_ratio) // The number of points to sample in each bin
 // along each axis.
 {
   if (input.type().is_cuda()) {
diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h
@@ -9,9 +9,9 @@
 std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width) {
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width) {
   if (input.type().is_cuda()) {
 #ifdef WITH_CUDA
     return ROIPool_forward_cuda(
diff --git a/torchvision/csrc/custom_ops/custom_ops.cpp b/torchvision/csrc/custom_ops/custom_ops.cpp
@@ -0,0 +1,14 @@
+#include <torch/script.h>
+
+#include "ROIAlign.h"
+#include "ROIPool.h"
+#include "nms.h"
+
+using namespace at;
+
+static auto registry =
+    torch::RegisterOperators()
+        .op("torchvision::nms", &nms)
+        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
+            &ROIAlign_forward)
+        .op("torchvision::roi_pool", &ROIPool_forward);
diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h
@@ -8,7 +8,7 @@
 at::Tensor nms(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   if (dets.device().is_cuda()) {
 #ifdef WITH_CUDA
     if (dets.numel() == 0) {
diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp
@@ -7,6 +7,8 @@
 #endif
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  // TODO: remove nms from here since it is now registered
+  //       and used as a PyTorch custom op
   m.def("nms", &nms, "non-maximum suppression");
   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
diff --git a/torchvision/ops/_custom_ops.py b/torchvision/ops/_custom_ops.py
@@ -0,0 +1,46 @@
+import os
+import sys
+import imp
+import torch
+
+
+# load the custom_op_library and register the custom ops
+lib_dir = os.path.join(os.path.dirname(__file__), '..')
+file, path, description = imp.find_module("_custom_ops", [lib_dir])
+torch.ops.load_library(path)
+
+
+def register_custom_op():
+    from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
+    from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
+
+    @parse_args('v', 'v', 'f')
+    def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
+        boxes = unsqueeze(g, boxes, 0)
+        scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
+        max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
+        iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
+        nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
+        return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
+
+    @parse_args('v', 'v', 'f', 'i', 'i', 'i')
+    def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
+        batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
+                                   value_t=torch.tensor([0], dtype=torch.long))), 1), False)
+        rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
+        return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
+                    output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
+
+    @parse_args('v', 'v', 'f', 'i', 'i')
+    def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
+        roi_pool = g.op('MaxRoiPool', input, rois,
+                        pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
+        return roi_pool, None
+
+    from torch.onnx import register_custom_op_symbolic
+    register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
+    register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
+    register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
+
+
+register_custom_op()
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
@@ -1,5 +1,5 @@
 import torch
-from torchvision.extension import _lazy_import
+import torchvision.ops._custom_ops
 
 
 def nms(boxes, scores, iou_threshold):
@@ -29,8 +29,7 @@ def nms(boxes, scores, iou_threshold):
         of the elements that have been kept
         by NMS, sorted in decreasing order of scores
     """
-    _C = _lazy_import()
-    return _C.nms(boxes, scores, iou_threshold)
+    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
 
 
 def batched_nms(boxes, scores, idxs, iou_threshold):
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
@@ -9,6 +9,8 @@
 from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
+import torchvision.ops._custom_ops
+
 
 class _RoIAlignFunction(Function):
     @staticmethod
@@ -66,6 +68,12 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
+    # TODO: Change this to support backwards, which we
+    #       do not currently support when JIT tracing.
+    if torch._C._get_tracing_state():
+        return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
+                                               output_size[0], output_size[1],
+                                               sampling_ratio)
     return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
 
 
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
@@ -9,6 +9,8 @@
 from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
+import torchvision.ops._custom_ops
+
 
 class _RoIPoolFunction(Function):
     @staticmethod
@@ -59,6 +61,12 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
+    # TODO: Change this to support backwards, which we
+    #       do not currently support when JIT tracing.
+    if torch._C._get_tracing_state():
+        output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
+                                                   output_size[0], output_size[1])
+        return output
     return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)