@@ -71,6 +71,12 @@ def test_autocast(self, x_dtype, rois_dtype):
7171 with torch .cuda .amp .autocast ():
7272 self .test_forward (torch .device ("cuda" ), contiguous = False , x_dtype = x_dtype , rois_dtype = rois_dtype )
7373
74+ @pytest .mark .parametrize ('x_dtype' , (torch .float , torch .half ))
75+ @pytest .mark .parametrize ('rois_dtype' , (torch .float , torch .half ))
76+ def test_autocast_cpu (self , x_dtype , rois_dtype ):
77+ with torch .cpu .amp .autocast ():
78+ self .test_forward (torch .device ("cpu" ), contiguous = False , x_dtype = x_dtype , rois_dtype = rois_dtype )
79+
7480 def _helper_boxes_shape (self , func ):
7581 # test boxes as Tensor[N, 5]
7682 with pytest .raises (AssertionError ):
@@ -278,6 +284,14 @@ def test_autocast(self, aligned, x_dtype, rois_dtype):
278284 self .test_forward (torch .device ("cuda" ), contiguous = False , aligned = aligned , x_dtype = x_dtype ,
279285 rois_dtype = rois_dtype )
280286
287+ @pytest .mark .parametrize ('aligned' , (True , False ))
288+ @pytest .mark .parametrize ('x_dtype' , (torch .float , torch .half ))
289+ @pytest .mark .parametrize ('rois_dtype' , (torch .float , torch .half ))
290+ def test_autocast_cpu (self , aligned , x_dtype , rois_dtype ):
291+ with torch .cpu .amp .autocast ():
292+ self .test_forward (torch .device ("cpu" ), contiguous = False , aligned = aligned , x_dtype = x_dtype ,
293+ rois_dtype = rois_dtype )
294+
281295 def _make_rois (self , img_size , num_imgs , dtype , num_rois = 1000 ):
282296 rois = torch .randint (0 , img_size // 2 , size = (num_rois , 5 )).to (dtype )
283297 rois [:, 0 ] = torch .randint (0 , num_imgs , size = (num_rois ,)) # set batch index
@@ -514,13 +528,27 @@ def test_nms_cuda(self, iou, dtype=torch.float64):
514528 is_eq = torch .allclose (scores [r_cpu ], scores [r_cuda .cpu ()], rtol = tol , atol = tol )
515529 assert is_eq , err_msg .format (iou )
516530
531+
517532 @needs_cuda
518533 @pytest .mark .parametrize ("iou" , (.2 , .5 , .8 ))
519534 @pytest .mark .parametrize ("dtype" , (torch .float , torch .half ))
520535 def test_autocast (self , iou , dtype ):
521536 with torch .cuda .amp .autocast ():
522537 self .test_nms_cuda (iou = iou , dtype = dtype )
523538
539+ @pytest .mark .parametrize ("iou" , (.2 , .5 , .8 ))
540+ @pytest .mark .parametrize ("dtype" , (torch .bfloat16 ,))
541+ def test_autocast_cpu (self , iou , dtype ):
542+ with torch .cpu .amp .autocast ():
543+ def test_nms_cpu (iou , dtype ):
544+ boxes , scores = self ._create_tensors_with_iou (1000 , iou )
545+ boxes = boxes .to (dtype = dtype )
546+ scores = scores .to (dtype = dtype )
547+ out = ops .nms (boxes , scores , iou )
548+ outf = ops .nms (boxes .float (), scores .float (), iou )
549+ torch .testing .assert_close (out , outf )
550+ test_nms_cpu (iou = iou , dtype = dtype )
551+
524552 @needs_cuda
525553 def test_nms_cuda_float16 (self ):
526554 boxes = torch .tensor ([[285.3538 , 185.5758 , 1193.5110 , 851.4551 ],
@@ -774,6 +802,12 @@ def test_autocast(self, batch_sz, dtype):
774802 with torch .cuda .amp .autocast ():
775803 self .test_forward (torch .device ("cuda" ), contiguous = False , batch_sz = batch_sz , dtype = dtype )
776804
805+ @pytest .mark .parametrize ('batch_sz' , (0 , 33 ))
806+ @pytest .mark .parametrize ('dtype' , (torch .float , torch .half ))
807+ def test_autocast_cpu (self , batch_sz , dtype ):
808+ with torch .cpu .amp .autocast ():
809+ self .test_forward (torch .device ("cpu" ), contiguous = False , batch_sz = batch_sz , dtype = dtype )
810+
777811 def test_forward_scriptability (self ):
778812 # Non-regression test for https://github.com/pytorch/vision/issues/4078
779813 torch .jit .script (ops .DeformConv2d (in_channels = 8 , out_channels = 8 , kernel_size = 3 ))
0 commit comments