From d0d8271b3f4c8ca91b22e88451f618f86bd25688 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Fri, 10 May 2024 19:43:27 -0700 Subject: [PATCH 01/37] make skipXPU work --- torch/testing/_internal/common_device_type.py | 45 +++++++++++- .../_internal/common_methods_invocations.py | 11 ++- torch/testing/_internal/common_utils.py | 6 +- torch/testing/_internal/opinfo/core.py | 73 ++++++++++++++++++- 4 files changed, 128 insertions(+), 7 deletions(-) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index d5285a6d0d7fd..17bd8357f15b0 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -397,14 +397,19 @@ def instantiate_test_helper(cls, name, *, test, param_kwargs=None, decorator_fn= # Add the device param kwarg if the test needs device or devices. param_kwargs = {} if param_kwargs is None else param_kwargs test_sig_params = inspect.signature(test).parameters + #import pdb + #pdb.set_trace() if 'device' in test_sig_params or 'devices' in test_sig_params: device_arg: str = cls._init_and_get_primary_device() if hasattr(test, 'num_required_devices'): device_arg = cls.get_all_devices() _update_param_kwargs(param_kwargs, 'device', device_arg) - + #import pdb + #pdb.set_trace() # Apply decorators based on param kwargs. for decorator in decorator_fn(param_kwargs): + #import pdb + #pdb.set_trace() test = decorator(test) # Constructs the test @@ -437,6 +442,8 @@ def instantiated_test(self, param_kwargs=param_kwargs): return result assert not hasattr(cls, name), f"Redefinition of test {name}" + #import pdb + #pdb.set_trace() setattr(cls, name, instantiated_test) def default_parametrize_fn(test, generic_cls, device_cls): @@ -448,6 +455,8 @@ def default_parametrize_fn(test, generic_cls, device_cls): # If one of the @dtypes* decorators is present, also parametrize over the dtypes set by it. dtypes = cls._get_dtypes(test) + #import pdb + #pdb.set_trace() if dtypes is not None: def dtype_parametrize_fn(test, generic_cls, device_cls, dtypes=dtypes): @@ -473,6 +482,7 @@ def dtype_parametrize_fn(test, generic_cls, device_cls, dtypes=dtypes): dtype_kwarg = param_kwargs['dtypes'] if 'dtypes' in param_kwargs else param_kwargs['dtype'] test_name = f'{name}{test_suffix}{device_suffix}{_dtype_test_suffix(dtype_kwarg)}' + print(test_name) instantiate_test_helper(cls=cls, name=test_name, test=test, param_kwargs=param_kwargs, decorator_fn=decorator_fn) @@ -832,6 +842,7 @@ class OpDTypes(Enum): any_one = 4 # Test precisely one supported dtype none = 5 # Instantiate no dtype variants (no dtype kwarg needed) any_common_cpu_cuda_one = 6 # Test precisely one supported dtype that is common to both cuda and cpu + any_common_cpu_xpu_one = 7 # Test precisely one supported dtype that is common to both xpu and cpu # Arbitrary order @@ -909,6 +920,8 @@ def _parametrize_test(self, test, generic_cls, device_cls): 'instantiate_parametrized_tests()') op = check_exhausted_iterator = object() + #import pdb + #pdb.set_trace() for op in self.op_list: # Determine the set of dtypes to use. dtypes: Union[Set[torch.dtype], Set[None]] @@ -941,13 +954,19 @@ def _parametrize_test(self, test, generic_cls, device_cls): dtypes = {next(dtype for dtype in ANY_DTYPE_ORDER if dtype in supported)} else: dtypes = {} - + elif self.opinfo_dtypes == OpDTypes.any_common_cpu_xpu_one: + # Tries to pick a dtype that supports both CPU and CUDA + supported = set(op.dtypes).intersection(op.dtypesIfXPU) + if supported: + dtypes = {next(dtype for dtype in ANY_DTYPE_ORDER if dtype in supported)} + else: + dtypes = {} elif self.opinfo_dtypes == OpDTypes.none: dtypes = {None} else: raise RuntimeError(f"Unknown OpDType: {self.opinfo_dtypes}") - if self.allowed_dtypes is not None: + if self.allowed_dtypes is not None and dtypes is not None: dtypes = dtypes.intersection(self.allowed_dtypes) # Construct the test name; device / dtype parts are handled outside. @@ -992,6 +1011,7 @@ def test_wrapper(*args, **kwargs): decorator_fn = partial(op.get_decorators, generic_cls.__name__, test.__name__, device_cls.device_type, dtype) + #print("create test {} op={} dtype={} param_kwargs={} decorator_fn={}".format(test_name, op, dtype, param_kwargs, decorator_fn)) yield (test_wrapper, test_name, param_kwargs, decorator_fn) except Exception as ex: # Provides an error message for debugging before rethrowing the exception @@ -1041,6 +1061,11 @@ class skipCUDAIf(skipIf): def __init__(self, dep, reason): super().__init__(dep, reason, device_type='cuda') +class skipXPUIf(skipIf): + + def __init__(self, dep, reason): + super().__init__(dep, reason, device_type='xpu') + # Skips a test on Lazy if the condition is true. class skipLazyIf(skipIf): @@ -1356,6 +1381,17 @@ def only_fn(self, *args, **kwargs): return only_fn +def onlyCUDAAndXPU(fn): + @wraps(fn) + def only_fn(self, *args, **kwargs): + if self.device_type not in ('cuda', 'xpu'): + reason = f"onlyCUDAAndXPU: doesn't run on {self.device_type}" + raise unittest.SkipTest(reason) + + return fn(self, *args, **kwargs) + + return only_fn + def disablecuDNN(fn): @wraps(fn) @@ -1563,6 +1599,9 @@ def skipLazy(fn): def skipMeta(fn): return skipMetaIf(True, "test doesn't work with meta tensors")(fn) +def skipXPU(fn): + return skipXPUIf(True, "test doesn't work with XPU tensors")(fn) + def skipXLA(fn): return skipXLAIf(True, "Marked as skipped for XLA")(fn) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 001d93de1875f..8c2a72390aca9 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -10421,6 +10421,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.abs, dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), + dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), + skipXPU=False, skips=( DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), 'TestBwdGradients', 'test_inplace_grad', dtypes=(torch.cdouble,)), @@ -10543,6 +10545,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, supports_forward_ad=True, supports_two_python_scalars=True, + skipXPU=False, decorators=( DecorateInfo( toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}), @@ -10572,6 +10575,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_autograd=False, error_inputs_func=error_inputs_item, sample_inputs_func=sample_inputs_item, + skipXPU=False, skips=( # Error testing item function variant DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', @@ -10584,7 +10588,9 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_autocast'), # Booleans mismatch: AssertionError: False is not true DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake'), - )), + #DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=None), + ) + ), OpInfo('arange', dtypes=all_types_and(torch.bfloat16, torch.float16), supports_out=True, @@ -10592,6 +10598,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): is_factory_function=True, error_inputs_func=error_inputs_arange, sample_inputs_func=sample_inputs_arange, + skipXPU=False, skips=( # https://github.com/pytorch/pytorch/issues/81774 DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'), @@ -18012,6 +18019,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_repeat_tile, + skipXPU=True, skips=( DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), )), @@ -19198,6 +19206,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): result_dtype=torch.bool, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), ref=reference_reduction_numpy(np.all), + skipXPU=False, skips=( # FIXME: uint8 input returns uint8 instead of bool DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_result_dtype', dtypes=[torch.uint8]), diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 46e4f817d2b9d..b56d44c3904bb 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -227,7 +227,7 @@ def maybe_load_json(filename): if os.getenv("DISABLED_TESTS_FILE", ""): disabled_tests_dict = maybe_load_json(os.getenv("DISABLED_TESTS_FILE", "")) -NATIVE_DEVICES = ('cpu', 'cuda', 'meta', torch._C._get_privateuse1_backend_name()) +NATIVE_DEVICES = ('cpu', 'cuda', 'meta', 'xpu', torch._C._get_privateuse1_backend_name()) check_names = ['orin', 'concord', 'galen', 'xavier', 'nano', 'jetson', 'tegra'] IS_JETSON = any(name in platform.platform() for name in check_names) @@ -389,6 +389,8 @@ def composite_fn(test, generic_cls, device_cls, old_parametrize_fn=old_parametrize_fn, new_parametrize_fn=new_parametrize_fn): old_tests = list(old_parametrize_fn(test, generic_cls, device_cls)) + import pdb + pdb.set_trace() for (old_test, old_test_name, old_param_kwargs, old_dec_fn) in old_tests: for (new_test, new_test_name, new_param_kwargs, new_dec_fn) in \ new_parametrize_fn(old_test, generic_cls, device_cls): @@ -403,6 +405,8 @@ def composite_fn(test, generic_cls, device_cls, old_test_name) def merged_decorator_fn(param_kwargs, old_dec_fn=old_dec_fn, new_dec_fn=new_dec_fn): + import pdb + pdb.set_trace() return list(old_dec_fn(param_kwargs)) + list(new_dec_fn(param_kwargs)) yield (new_test, merged_test_name, full_param_kwargs, merged_decorator_fn) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index 70c643d2b8fee..02e430ed616c5 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -25,6 +25,7 @@ floating_and_complex_types, floating_and_complex_types_and, floating_types, + empty_types, ) from torch.testing._internal.common_utils import ( is_iterable_of_tensors, @@ -95,18 +96,26 @@ def __init__( self.dtypes = dtypes self.active_if = active_if + print("init decorators: {} {} {} {} {}".format(self.cls_name, self.test_name, self.device_type, self.dtypes, self.active_if)) + # Validate dtypes if self.dtypes is not None: for dtype in self.dtypes: assert isinstance(dtype, torch.dtype) def is_active(self, cls_name, test_name, device_type, dtype, param_kwargs): + print("is_active: {} {} {} {} {} {}".format(self.decorators, self.active_if, + (self.cls_name is None or self.cls_name == cls_name), + (self.test_name is None or self.test_name == test_name), + (self.device_type is None or self.device_type == device_type), + (self.dtypes is None or dtype in self.dtypes))) + print("is_active details: {} {} {} {}".format(self.cls_name, cls_name, self.test_name, test_name)) return ( self.active_if and (self.cls_name is None or self.cls_name == cls_name) and (self.test_name is None or self.test_name == test_name) and (self.device_type is None or self.device_type == device_type) - and (self.dtypes is None or dtype in self.dtypes) + and (self.dtypes is None or G in self.dtypes) # Support callables over kwargs to determine if the decorator is active. and ( self.active_if(param_kwargs) @@ -680,6 +689,9 @@ class OpInfo: # information about which tests to skip skips: Tuple = tuple() + # skip xpu by default + skipXPU: bool = True + # decorators to apply to generated tests decorators: Tuple = tuple() @@ -723,6 +735,9 @@ class OpInfo: # dtypes this function is expected to work with on CUDA dtypesIfCUDA: _dispatch_dtypes = None + # dtypes this function is expected to work with on XPU + dtypesIfXPU: _dispatch_dtypes = None + # dtypes this function is expected to work with on ROCM dtypesIfROCM: _dispatch_dtypes = None @@ -732,6 +747,9 @@ class OpInfo: # backward dtypes this function is expected to work with on CUDA backward_dtypesIfCUDA: _dispatch_dtypes = None + # backward dtypes this function is expected to work with on XPU + backward_dtypesIfXPU: _dispatch_dtypes = None + # backward dtypes this function is expected to work with on ROCM backward_dtypesIfROCM: _dispatch_dtypes = None @@ -945,6 +963,19 @@ def __post_init__(self): else self.dtypes ) ) + + self.backward_dtypesIfXPU = ( + set(self.backward_dtypesIfXPU) + if self.backward_dtypesIfXPU is not None + else ( + self.backward_dtypes + if self.backward_dtypes is not None + else self.dtypesIfXPU + if self.dtypesIfXPU is not None + else self.dtypes + ) + ) + self.backward_dtypes = ( set(self.backward_dtypes) if self.backward_dtypes is not None @@ -954,6 +985,11 @@ def __post_init__(self): self.dtypesIfCUDA = ( set(self.dtypesIfCUDA) if self.dtypesIfCUDA is not None else self.dtypes ) + + self.dtypesIfXPU = ( + set(self.dtypesIfXPU) if self.dtypesIfXPU is not None else self.dtypes + ) + self.dtypesIfROCM = ( set(self.dtypesIfROCM) if self.dtypesIfROCM is not None @@ -991,6 +1027,19 @@ def __post_init__(self): else: self.inplace_operator_variant = None + if self.skipXPU == True: + skip_dtypes= self.dtypesIfXPU + + if self.skips is not None: + #self.skips = (*self.skips, DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=skip_dtypes)) + self.skips = (*self.skips, DecorateInfo(unittest.skip, device_type="xpu", dtypes=None)) + else: + #self.skips = (DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=skip_dtypes)) + self.skips = (DecorateInfo(unittest.skip, device_type="xpu", dtypes=None)) + print("#### skipXPU on {} {} {}".format(self.name, skip_dtypes, self.skips)) + else: + print("#### Don't skipXPU on {}".format(self.name)) + self.decorators = (*self.decorators, *self.skips) # Specifying sample inputs function without specifying the @@ -1125,6 +1174,9 @@ def __post_init__(self): self.aliases = tuple(AliasInfo(a) for a in self.aliases) # type: ignore[assignment] else: self.aliases = () + + + def __call__(self, *args, **kwargs): """Calls the function variant of the operator.""" @@ -1329,6 +1381,8 @@ def sample_inputs_sparse_bsc(self, device, dtype, requires_grad=False, **kwargs) def get_decorators(self, test_class, test_name, device, dtype, param_kwargs): """Returns the decorators targeting the given test.""" result = [] + #import pdb + #pdb.set_trace() for decorator in self.decorators: if isinstance(decorator, DecorateInfo): if decorator.is_active( @@ -1345,6 +1399,9 @@ def supported_dtypes(self, device_type): device_type = torch.device(device_type).type if device_type == "cuda": return self.dtypesIfROCM if TEST_WITH_ROCM else self.dtypesIfCUDA + if device_type == "xpu": + return self.dtypesIfXPU + return self.dtypes def supported_backward_dtypes(self, device_type): @@ -1361,6 +1418,8 @@ def supported_backward_dtypes(self, device_type): if TEST_WITH_ROCM else self.backward_dtypesIfCUDA ) + elif device_type == "xpu": + backward_dtypes = self.backward_dtypesIfXPU else: backward_dtypes = self.backward_dtypes @@ -1515,6 +1574,7 @@ def __init__( yield tuple(), {}, ), + skipXPU: bool = True, # Options from the OpInfo base class **kwargs, ): @@ -1538,7 +1598,7 @@ def sample_inputs_func(*args, **kwargs): # Override OpInfo defaults and call base class __init__ kwargs.setdefault("inplace_variant", None) kwargs.setdefault("sample_inputs_func", sample_inputs_func) - super().__init__(name, promotes_int_to_float=promotes_int_to_float, **kwargs) + super().__init__(name, promotes_int_to_float=promotes_int_to_float, skipXPU = skipXPU, **kwargs) self.identity = identity self.nan_policy = nan_policy @@ -2103,6 +2163,7 @@ def __init__( supports_rhs_python_scalar=True, # Whether the operator allows Tensor x scalar inputs supports_one_python_scalar=False, # Whether the operator allows scalar x tensor and tensor x scalar inputs supports_two_python_scalars=False, # Whether the operator allows scalar x scalar inputs + skipXPU=True, **kwargs, ): self._original_binary_ufunc_args = locals().copy() @@ -2123,6 +2184,7 @@ def __init__( sample_inputs_func=sample_inputs_func, reference_inputs_func=reference_inputs_func, error_inputs_func=make_error_inputs_elementwise_binary(error_inputs_func), + skipXPU=skipXPU, **kwargs, ) @@ -2451,6 +2513,7 @@ def __init__( reference_inputs_func=reference_inputs_elementwise_unary, sample_kwargs=lambda device, dtype, input: ({}, {}), reference_numerics_filter=None, # Filters values in the range of the domain specified above but that should not be tested + skipXPU=True, **kwargs, ): self._original_unary_ufunc_args = locals().copy() @@ -2460,8 +2523,10 @@ def __init__( dtypes=dtypes, sample_inputs_func=sample_inputs_func, reference_inputs_func=reference_inputs_func, + skipXPU=skipXPU, **kwargs, ) + self.domain = domain self.handles_complex_extremal_values = handles_complex_extremal_values self.handles_large_floats = handles_large_floats @@ -2593,6 +2658,7 @@ def __init__( ndimensional: SpectralFuncType, sample_inputs_func=sample_inputs_spectral_ops, decorators=None, + skipXPU=True, **kwargs, ): self._original_spectral_func_args = dict(locals()).copy() @@ -2613,6 +2679,7 @@ def __init__( dtypes=dtypes, decorators=decorators, sample_inputs_func=sample_inputs_func, + skipXPU=skipXPU, **kwargs, ) self.ref = ref @@ -2631,6 +2698,7 @@ def __init__( dtypesIfCUDA=None, dtypesIfROCM=None, sample_inputs_func=None, + skipXPU=True, **kwargs, ): super().__init__( @@ -2639,6 +2707,7 @@ def __init__( dtypesIfCUDA=dtypesIfCUDA, dtypesIfROCM=dtypesIfROCM, sample_inputs_func=sample_inputs_func, + skipXPU=skipXPU, **kwargs, ) self.ref = ref From c791db9c4f7ae807f4d22bf9434df40aaf764428 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 12 May 2024 22:16:15 -0700 Subject: [PATCH 02/37] enabled torch-xpu ops in op_db --- test/test_ops.py | 1734 +++++++++-------- .../_internal/common_methods_invocations.py | 39 +- torch/testing/_internal/common_utils.py | 6 +- torch/testing/_internal/opinfo/core.py | 12 +- 4 files changed, 920 insertions(+), 871 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 44f503ae9b6ed..4b665336c1a50 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -32,10 +32,12 @@ instantiate_device_type_tests, onlyCPU, onlyCUDA, + onlyCUDAAndXPU, onlyNativeDeviceTypes, OpDTypes, ops, skipMeta, + skipXPU, ) from torch.testing._internal.common_dtype import ( all_types_and_complex_and, @@ -76,6 +78,7 @@ TEST_WITH_TORCHDYNAMO, TEST_WITH_TORCHINDUCTOR, TEST_WITH_UBSAN, + TEST_XPU, TestCase, unMarkDynamoStrictTest, ) @@ -105,6 +108,12 @@ ) ) +if TEST_XPU: + any_common_cpu_device_one = OpDTypes.any_common_cpu_xpu_one +else: + any_common_cpu_device_one = OpDTypes.any_common_cpu_cuda_one + + def reduction_dtype_filter(op): if ( @@ -127,6 +136,11 @@ def reduction_dtype_filter(op): aten = torch.ops.aten +_xpu_computation_op_list = ["_refs.abs", "_refs.all", "item", "abs", "add", "_refs.fill"] +_xpu_computation_op_list = ["abs"] +_xpu_computation_ops = [ + op for op in ops_and_refs if op.name in _xpu_computation_op_list +] # Tests that apply to all operators and aren't related to any particular # system @@ -153,9 +167,10 @@ def tearDownClass(cls): assert len(filtered_ops) == 0, err_msg # Validates that each OpInfo works correctly on different CUDA devices - @onlyCUDA + @onlyCUDAAndXPU @deviceCountAtLeast(2) @ops(op_db, allowed_dtypes=(torch.float32, torch.long)) + #@ops(_xpu_computation_ops, dtypes=any_common_cpu_device_one) def test_multiple_devices(self, devices, dtype, op): for cuda_device_str in devices: cuda_device = torch.device(cuda_device_str) @@ -271,7 +286,7 @@ def test_numpy_ref(self, device, dtype, op): and op.formatted_name in ("signal_windows_exponential", "signal_windows_bartlett") and dtype == torch.float64 - and "cuda" in device + and ("cuda" in device or "xpu" in device) ): # noqa: E121 raise unittest.SkipTest("XXX: raises tensor-likes are not close.") @@ -283,16 +298,19 @@ def test_numpy_ref(self, device, dtype, op): ) # Tests that the cpu and gpu results are consistent - @onlyCUDA + @onlyCUDAAndXPU @suppress_warnings @slowTest - @ops(_ops_and_refs_with_no_numpy_ref, dtypes=OpDTypes.any_common_cpu_cuda_one) + @ops(_ops_and_refs_with_no_numpy_ref, dtypes=any_common_cpu_device_one) + #@ops(_xpu_computation_ops, dtypes=any_common_cpu_device_one) def test_compare_cpu(self, device, dtype, op): def to_cpu(arg): if isinstance(arg, torch.Tensor): return arg.to(device="cpu") return arg + #import pdb + #pdb.set_trace() samples = op.reference_inputs(device, dtype) for sample in samples: @@ -540,7 +558,7 @@ def test_python_ref_torch_fallback(self, device, dtype, op): self._ref_test_helper(contextlib.nullcontext, device, dtype, op) @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") - @onlyCUDA + @onlyCUDAAndXPU @ops(python_ref_db) @parametrize( "executor", @@ -590,6 +608,7 @@ def test_errors(self, device, op): out = op(si.input, *si.args, **si.kwargs) self.assertFalse(isinstance(out, type(NotImplemented))) + @skipXPU @skipMeta @onlyNativeDeviceTypes @ops( @@ -1002,6 +1021,9 @@ def _case_two_transform(t): wrong_device = "cpu" elif torch.cuda.is_available(): wrong_device = "cuda" + elif torch.xpu.is_available(): + # Daisy ???? + wrong_device = "xpu" factory_fn_msg = ( "\n\nNOTE: If your op is a factory function (i.e., it accepts TensorOptions) you should mark its " @@ -1818,857 +1840,859 @@ def check_cow_input( allow_list=op.allow_cow_input_materialize_backward, ) - @ops(op_db, allowed_dtypes=(torch.float,)) - def test_view_replay(self, device, dtype, op): - def _assert_match_metadata(a, b): - self.assertEqual(a.size(), b.size()) - self.assertEqual(a.stride(), b.stride()) - self.assertEqual(a.storage_offset(), b.storage_offset()) - self.assertEqual(a.device, b.device) - self.assertEqual(a.dtype, b.dtype) - - # ensure view replay is enabled - with torch.autograd._force_original_view_tracking(True): - for sample in op.sample_inputs(device, dtype, requires_grad=False): - inp = sample.input - outs = op(inp, *sample.args, **sample.kwargs) - if not isinstance(outs, (tuple, List)): - outs = [outs] - - # for all outputs that are views of the input, we should be able to replay the - # forward and reverse views via a functioning view_func() / rev_view_func(). - for out in outs: - if not ( - isinstance(out, torch.Tensor) - and out._is_view() - and out._base is inp - ): - continue - - # forward view_func - new_inp = inp.clone() - _assert_match_metadata(new_inp, inp) - new_out = out._view_func_unsafe(new_inp) - _assert_match_metadata(new_out, out) - self.assertEqual(new_out, out) - - # reverse view_func - new_out = out.detach() - new_inp = out._rev_view_func_unsafe(new_out) - _assert_match_metadata(new_inp, inp) - self.assertTrue(new_inp._is_view()) - self.assertTrue(new_inp._base is new_out) - - -@unMarkDynamoStrictTest -class TestMathBits(TestCase): - # Tests that - # 1. The operator's output for physically conjugated/negated tensors and conjugate/negative view tensors - # produces the same value - # 2. The gradients are same in both cases mentioned in (1) - # 3. If the operator's inplace variant is supported, tests that the inplace operation - # produces the correct value when called on a conjugate/negative view tensor and that the output - # has its conj/neg bit set to true - # This test only runs for C -> R and C -> C functions - # TODO: add tests for `R->C` functions - # Note: This test runs for functions that take both tensors and tensorlists as input. - def _test_math_view( - self, - device, - dtype, - op, - samples, - math_op_physical, - math_op_view, - is_bit_set, - out_type, - ): - inplace_variant = op.inplace_variant - - # helper function to clone and conjugate/negate the input if its a tensor - # else clone the sequence and conjugate/negate the first element in the sequence - # If a requires_grad argument is provided the tensor being conjugated/negated will - # have its requires_grad set to that value. - def clone_and_perform_view(input, **kwargs): - if isinstance(input, torch.Tensor): - requires_grad = kwargs.get("requires_grad", input.requires_grad) - with torch.no_grad(): - # Ensure view represents the original sample input - input = math_op_physical(input) - # Note: .conj() is not called under no_grad mode since it's not allowed to modify a - # view created in no_grad mode. Here it's ok to do so, so as a workaround we call conj - # before resetting the requires_grad field for input - input = math_op_view(input) - assert input.is_leaf - return input.requires_grad_(requires_grad) - - if isinstance(input, Sequence): - out = list(map(clone_input_helper, input)) - out[0] = clone_and_perform_view(out[0]) - return tuple(out) - - for sample in samples: - tensor = ( - sample.input - if isinstance(sample.input, torch.Tensor) - else sample.input[0] - ) - cloned1 = clone_and_perform_view(sample.input) - - # Computes function forward value with a physically conjugated/negated tensor and - # a conj/neg view tensor and verifies that the output in both case are equal. - expected_forward = op(sample.input, *sample.args, **sample.kwargs) - forward_with_mathview = op(cloned1, *sample.args, **sample.kwargs) - self.assertEqual(expected_forward, forward_with_mathview) - - # If the op has an inplace variant, and the input doesn't require broadcasting - # and has the same dtype as output, verify that the inplace operation on a conjugated/negated - # input produces correct output, and the output tensor has the conj/neg bit set to True - if inplace_variant is not None and not sample.broadcasts_input: - cloned2 = clone_and_perform_view(tensor, requires_grad=False) - if ( - isinstance(expected_forward, torch.Tensor) - and expected_forward.dtype is tensor.dtype - ): - inplace_forward = inplace_variant( - cloned2, *sample.args, **sample.kwargs - ) - self.assertTrue(is_bit_set(inplace_forward)) - self.assertEqual(inplace_forward, expected_forward) - - # TODO: backward consistency only supported for single tensor outputs - # TODO: backward consistency only checked on sample.input, not all - # tensor inputs - # TODO: update to handle checking grads of all tensor inputs as - # derived from each tensor output - if ( - isinstance(expected_forward, torch.Tensor) - and expected_forward.requires_grad - ): - output_process_fn_grad = sample.output_process_fn_grad or (lambda x: x) - expected_forward = output_process_fn_grad(expected_forward) - forward_with_mathview = output_process_fn_grad(forward_with_mathview) - - tensor = ( - sample.input - if isinstance(sample.input, torch.Tensor) - else sample.input[0] - ) - expected_forward.sum().abs().backward(retain_graph=True) - forward_with_mathview.sum().abs().backward(retain_graph=True) - if tensor.grad is not None: - cloned1_tensor = ( - cloned1 if isinstance(cloned1, torch.Tensor) else cloned1[0] - ) - self.assertEqual(tensor.grad, cloned1_tensor.grad) - - tensor.grad, cloned1_tensor.grad = None, None - - # a repeat of the above test if output is not complex valued - if out_type(expected_forward): - grad = torch.randn_like(expected_forward) - expected_forward.backward(grad) - forward_with_mathview.backward( - math_op_view(math_op_physical(grad)) - ) - - self.assertEqual(tensor.grad, cloned1_tensor.grad) - - @ops(ops_and_refs, allowed_dtypes=(torch.cfloat,)) - def test_conj_view(self, device, dtype, op): - if not op.test_conjugated_samples: - self.skipTest("Operation doesn't support conjugated inputs.") - math_op_physical = torch.conj_physical - math_op_view = torch.conj - _requires_grad = torch.cfloat in op.supported_backward_dtypes( - torch.device(device).type - ) - is_bit_set = torch.is_conj - samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) - self._test_math_view( - device, - dtype, - op, - samples, - math_op_physical, - math_op_view, - is_bit_set, - torch.is_complex, - ) - - @ops(ops_and_refs, allowed_dtypes=(torch.double,)) - def test_neg_view(self, device, dtype, op): - if not op.test_neg_view: - self.skipTest("Operation not tested with tensors with negative bit.") - math_op_physical = torch.neg - math_op_view = torch._neg_view - is_bit_set = torch.is_neg - samples = op.sample_inputs(device, dtype, requires_grad=op.supports_autograd) - self._test_math_view( - device, - dtype, - op, - samples, - math_op_physical, - math_op_view, - is_bit_set, - lambda x: True, - ) - - @ops(ops_and_refs, allowed_dtypes=(torch.cdouble,)) - def test_neg_conj_view(self, device, dtype, op): - if not op.test_neg_view: - self.skipTest("Operation not tested with tensors with negative bit.") - if not op.test_conjugated_samples: - self.skipTest("Operation doesn't support conjugated inputs.") - - def math_op_physical(x): - return -x.conj_physical() - - def math_op_view(x): - return torch._neg_view(x).conj() - - def is_bit_set(x): - return torch.is_neg(x) and torch.is_conj(x) - - _requires_grad = dtype in op.supported_backward_dtypes( - torch.device(device).type - ) - samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) - # Only test one sample - samples = itertools.islice(samples, 1) - self._test_math_view( - device, - dtype, - op, - samples, - math_op_physical, - math_op_view, - is_bit_set, - torch.is_complex, - ) - - -# input strides and size may have been altered due to the result of an inplace op -def check_inplace_view(func, input, rs, input_size, input_strides): - if func is None: - return - # TODO: extend this test to test ops with multiple outputs and ops like native_batch_norm(_legit).out - # which mutate not necessarily the first input. - if isinstance(rs, torch.Tensor) and rs is input: - unequal_size = rs.size() != input_size - unequal_strides = rs.stride() != input_strides - # resize_ should probably have inplace_view tag. Not adding the tag since it - # breaks some codegen logic - if unequal_size or unequal_strides: - if isinstance(func, torch._ops.OpOverloadPacket): - func = func.default - # Reference: https://github.com/pytorch/pytorch/issues/78759 - if func is not torch.ops.aten.resize_.default: - # TODO: use self.assertIn when we have separate tests for each tag - assert torch.Tag.inplace_view in func.tags - - -# A mode that when enabled runs correctness checks to ensure -# that operators have expected tags based on their input and -# output tensor properties -class TestTagsMode(TorchDispatchMode): - def __torch_dispatch__(self, func, types, args=(), kwargs=None): - if isinstance(args[0], torch.Tensor): - old_size = args[0].size() - old_stride = args[0].stride() - rs = func(*args, **kwargs) - check_inplace_view(func, args[0], rs, old_size, old_stride) - else: - rs = func(*args, **kwargs) - return rs - - -# Test to verify the correctness for tags in `tags.yaml`, also available for access through `torch.Tags` -@unMarkDynamoStrictTest -class TestTags(TestCase): - @onlyCPU - @ops(ops_and_refs, dtypes=OpDTypes.any_one) - def test_tags(self, device, dtype, op): - samples = op.sample_inputs(device, dtype, requires_grad=False) - for sample in samples: - # TODO: Test tags for ops that return a list of tensors - input = sample.input - if isinstance(input, torch.Tensor): - old_size = input.size() - old_stride = input.stride() - with TestTagsMode(): - rs = op(input, *sample.args, **sample.kwargs) - # TODO: add test for aliases: https://github.com/pytorch/pytorch/issues/78761 - aten_name = op.aten_name if op.aten_name is not None else op.name - opoverloadpacket = getattr(torch.ops.aten, aten_name, None) - check_inplace_view(opoverloadpacket, input, rs, old_size, old_stride) - - -class TestSelfKwarg(TestCase): - def test_self_kwargs(self): - """Verify that we can call the aten ops with all kwargs even if the - argument's name is "self" - """ - torch.ops.aten.reshape.default(self=torch.rand(1, 2), shape=[2]) - torch.ops.aten.min.default(self=torch.rand(100)) - - -@unMarkDynamoStrictTest -class TestRefsOpsInfo(TestCase): - import_paths = [ - "_refs", - "_refs.special", - "_refs.nn.functional", - "_refs.fft", - "_refs._conversions", - ] - module_alls = [ - (path, import_module(f"torch.{path}").__all__) for path in import_paths - ] - ref_ops_names = tuple( - itertools.chain.from_iterable( - [f"{path}.{op}" for op in module_all] for path, module_all in module_alls - ) - ) - ref_db_names = {ref_op.name for ref_op in python_ref_db} - - # TODO: References that do not have an entry in python_ref_db - skip_ref_ops = { - "_refs.alias", - "_refs.bitwise_right_shift", - "_refs.copy_to", - "_refs.empty_permuted", - "_refs.empty_strided", - "_refs.equal", - "_refs.full", - "_refs.full_like", - "_refs.is_complex", - "_refs.to", - "_refs.mvlgamma", - "_refs.ones", - "_refs.ones_like", - "_refs.special.expit", - "_refs.std_var", - "_refs.swap_axes", - "_refs.uniform", - "_refs.scalar_tensor", - "_refs.trunc_divide", - "_refs.zero", - "_refs.zeros", - "_refs.zeros_like", - "_refs.rfloordiv", - "_refs.rtruediv", - "_refs.rpow", - # These should be tested with their out-of-place counterparts - "_refs.index_add_", - "_refs.index_copy_", - "_refs.index_fill_", - "_refs.native_group_norm", - } - - not_in_decomp_table = { - # duplicated in _decomp and _refs - "_refs.nn.functional.group_norm", - "_refs.nn.functional.mse_loss", - "_refs.floor_divide", - # duplicated as refs do not have decent support for advanced indexing - "_refs.index_copy", - "_refs.index_copy_", - "_refs.index_add", - "_refs.index_add_", - # these are not aten ops? - "_refs._conversions.bfloat16", - "_refs._conversions.bool", - "_refs._conversions.byte", - "_refs._conversions.char", - "_refs._conversions.double", - "_refs._conversions.float", - "_refs._conversions.half", - "_refs._conversions.int", - "_refs._conversions.long", - "_refs._conversions.short", - "_refs._conversions.chalf", - "_refs._conversions.cfloat", - "_refs._conversions.cdouble", - "_refs.broadcast_shapes", - "_refs.broadcast_tensors", - "_refs.mvlgamma", - "_refs.nn.functional.layer_norm", - "_refs.nn.functional.tanhshrink", - "_refs.nn.functional.triplet_margin_loss", - "_refs.rfloordiv", - "_refs.rtruediv", - "_refs.rpow", - # CompositeImplicitAutograd - "_refs.allclose", - "_refs.atleast_1d", - "_refs.atleast_2d", - "_refs.atleast_3d", - "_refs.broadcast_to", - "_refs.chunk", - "_refs.column_stack", - "_refs.contiguous", - "_refs.dsplit", - "_refs.dstack", - "_refs.fill", - "_refs.fill_", - "_refs.flatten", - "_refs.fliplr", - "_refs.flipud", - "_refs.float_power", - "_refs.hsplit", - "_refs.hstack", - "_refs.isclose", - "_refs.isfinite", - "_refs.isreal", - "_refs.istft", - "_refs.log_softmax", - "_refs.movedim", - "_refs.narrow", - "_refs.nn.functional.dropout", - "_refs.nn.functional.l1_loss", - "_refs.nn.functional.smooth_l1_loss", - "_refs.nn.functional.log_softmax", - "_refs.nn.functional.poisson_nll_loss", - "_refs.nn.functional.softmax", - "_refs.nn.functional.softmin", - "_refs.positive", - "_refs.ravel", - "_refs.reshape", - "_refs.softmax", - "_refs.special.expit", - "_refs.special.log_softmax", - "_refs.special.softmax", - "_refs.square", - "_refs.stft", - "_refs.T", - "_refs.take_along_dim", - "_refs.tensor_split", - "_refs.to", - "_refs.true_divide", - "_refs.trunc_divide", - "_refs.vsplit", - "_refs.vstack", - "_refs.linalg.matrix_norm", - "_refs.linalg.norm", - "_refs.linalg.svd", - "_refs.linalg.svdvals", - "_refs.unflatten", - "_refs.sum_to_size", - # ref implementation missing kwargs - "_refs.full_like", # missing "layout" - "_refs.scalar_tensor", # missing "layout" - # other - "_refs.block_diag", # only refs._block_diag_iterable is in decomposition table - "_refs.empty", # intentional; direct empty is faster and has less guards - "_refs.empty_permuted", # intentional; direct empty is faster and has less guards - "_refs.expand_as", - "_refs.as_strided", # _prims._as_strided_meta: "reduce() of empty sequence with no initial value" - "_refs.copy_to", # torch._C._jit_get_operation: No such operator aten::copy_to - "_refs.equal", # 'bool' object has no attribute 'dtype' - "_refs.conj", # Calls _prims.conj - "_refs.real", - "_refs.imag", - "_refs.reshape_as", - "_refs.view_as", - "_refs.view_as_complex", # TorchInductor does not support complex at the moment. - # the decompositions for these ops are slightly different - # because of out handling - "_refs.var_mean", - "_refs.std_mean", - "_refs.native_layer_norm", - } - - @parametrize("op", ref_ops_names) - def test_refs_are_in_python_ref_db(self, op): - inplace = op[-1] == "_" - if op in self.skip_ref_ops: - raise unittest.SkipTest(f"{op} does not have an entry in python_ref_db") - elif inplace: - self.assertNotIn( - op, - self.ref_db_names, - msg=f"{op} is an in-place operation and should not have an OpInfo", - ) - else: - # Intentionally don't use assertIn to avoid printing the - # (very large) container - self.assertTrue(op in self.ref_db_names, msg=f"{op} not in ref_db_names") - - @parametrize("op", ref_ops_names) - def test_refs_are_in_decomp_table(self, op): - path = op.split(".") - module_path = ".".join(path[:-1]) - op_name = path[-1] - op_impl = getattr(import_module(f"torch.{module_path}"), op_name) - - if op in self.not_in_decomp_table: - self.assertNotIn( - op_impl, - torch._decomp.decomposition_table.values(), - f"Unexpectedly found {op} in torch._decomp.decomposition_table.values()", - ) - else: - self.assertIn( - op_impl, - torch._decomp.decomposition_table.values(), - f"Did not find {op} in torch._decomp.decomposition_table.values()", - ) - - -fake_skips = ( - "aminmax", # failing input - "cov", # aweights cannot be negtaive - "istft", # window overlap add min: 0 - "linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet - "linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend - "linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend - # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen - "linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend - "linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor - "linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend - "linalg.tensorsolve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' - "lu_solve", # MALLOC ERROR: debug - "multinomial", # Could not run 'aten::multinomial' with arguments from the 'Meta' backend - "mvlgamma.mvlgamma_p_1", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend - "mvlgamma.mvlgamma_p_3", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend - "mvlgamma.mvlgamma_p_5", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend - "nanmean", # logical_not() got an unexpected keyword argument 'out' - "quantile", # quantile() q values must be in the range [0, 1] - "nanquantile", # quantile() q values must be in the range [0, 1] - "nn.functional.ctc_loss", # The tensor has a non-zero number of elements, but its data is not allocated yet - "nn.functional.embedding_bag", # sometimes errors - "nn.functional.nll_loss", # sometimes errors - "nn.functional.max_pool1d", # The tensor has a non-zero number of elements - "to_sparse", # Could not run 'aten::_to_sparse' with arguments from the 'Meta' backend - "tensor_split", # The tensor has a non-zero number of elements, but its data is not allocated yet - "repeat_interleave", # cannot repeat_interleave a meta tensor without output_size - "sparse.sampled.addmm", # sparsity not supported - # Can not infer total number of classes from meta. no way at present to throw DynamicOutputShapeException - "nn.functional.one_hot", - "narrow", # Fails only for one overload with DataDependentOutputException (hence skip). -) - -fake_autocast_device_skips = defaultdict(dict) - -# TODO: investigate/fix -fake_autocast_device_skips["cpu"] = {"linalg.pinv"} - - -dynamic_output_op_tests = ( - "argwhere", - "bincount", - "combinations", - "linalg.lstsq", - "masked_select", - "nonzero", - "unique_consecutive", - "unique", - "linalg.lstsq.grad_oriented", -) - -# Ops that have dynamic output shapes that we can handle when -# allow_dynamic_shape_ops is True in fake tensor shape environment. -supported_dynamic_output_op_tests = ( - "nonzero", - "unique", - "repeat_interleave", - "masked_select", -) - -# some inputs invoke dynamic output shape operators, some do not -sometimes_dynamic_output_op_test = ( - "__getitem__", - "index_select", -) - -data_dependent_op_tests = ( - "equal", - "corrcoef", - "nn.functional.gaussian_nll_loss", - "allclose", -) - -aliasing_failures = ("histogramdd",) - -fake_backward_skips = { - "linalg.cond", - "linalg.matrix_norm", - "linalg.norm", - "linalg.svd", - "linalg.svdvals", - "pca_lowrank", - "roll", - "svd_lowrank", - "sgn", -} - -fake_backward_xfails = {skip(s) for s in fake_backward_skips} | { - xfail("fft.ihfftn"), # Mismatch in aten._conj_physical.default - xfail("fft.ihfft2"), # Mismatch in aten._conj_physical.default - skip("nn.functional.ctc_loss"), -} - -fake_autocast_backward_xfails = { - skip("nn.functional.binary_cross_entropy"), - skip("sparse.sampled_addmm"), - skip("linalg.pinv"), - skip("linalg.pinv", "hermitian"), - skip("linalg.pinv", "singular"), - skip("pinverse"), -} - - -@unMarkDynamoStrictTest -class TestFakeTensor(TestCase): - def setUp(self): - # Turn on FakeTensor caching and cross-checking for these tests: - cache_enabled = unittest.mock.patch( - "torch._dynamo.config.fake_tensor_cache_enabled", True - ) - cache_enabled.start() - self.addCleanup(cache_enabled.stop) - - cache_crosscheck = unittest.mock.patch( - "torch._dynamo.config.fake_tensor_cache_crosscheck_enabled", True - ) - cache_crosscheck.start() - self.addCleanup(cache_crosscheck.stop) - - def _test_fake_helper(self, device, dtype, op, context): - name = op.name - if op.variant_test_name: - name += "." + op.variant_test_name - if name in fake_skips or "sparse" in name or "jiterator" in name: - self.skipTest("Skip failing test") - - samples = op.sample_inputs(device, dtype, requires_grad=False) - for sample in samples: - mode = FakeTensorMode() - - from torch.fx.experimental.symbolic_shapes import ShapeEnv - - allow_dynamic_output_shape_shape_env = ShapeEnv( - allow_dynamic_output_shape_ops=True - ) - - allow_dynamic_output_shape_mode = FakeTensorMode( - shape_env=allow_dynamic_output_shape_shape_env - ) - - try: - with context(): - res = op(sample.input, *sample.args, **sample.kwargs) - except Exception: - continue - - def run_with_fake_mode_and_verify(fake_mode, match_results=True): - def map_to_fake(e): - if isinstance(e, torch.Tensor): - return fake_mode.from_tensor(e) - else: - return e - - input = tree_map(map_to_fake, sample.input) - args = tree_map(map_to_fake, sample.args) - kwargs = tree_map(map_to_fake, sample.kwargs) - - try: - with context(): - with fake_mode: - res_fake = op(input, *args, **kwargs) - - if not match_results: - return - - for fake_out, real_out in zip( - pytree.tree_leaves(res_fake), pytree.tree_leaves(res) - ): - if not isinstance(fake_out, torch.Tensor): - self.assertTrue(not isinstance(real_out, torch.Tensor)) - self.assertEqual(fake_out, real_out) - continue - - self.assertTrue(isinstance(fake_out, FakeTensor)) - # if you see a shape exception here, you may need to add - # a `dynamic_output_shape` tag to an operator - - # prims/decomps must correctly model strides, - # see https://github.com/pytorch/pytorch/issues/78050#issuecomment-1253950325 - prims.utils.compare_tensor_meta(fake_out, real_out, True) - - if name not in aliasing_failures: - fake_aliasing = outputs_alias_inputs( - (input, args, kwargs), res_fake - ) - real_aliasing = outputs_alias_inputs( - (sample.input, sample, args, sample.kwargs), res - ) - self.assertEqual(fake_aliasing, real_aliasing) - - self.assertTrue( - name not in dynamic_output_op_tests - and name not in data_dependent_op_tests - ) - - except torch._subclasses.fake_tensor.UnsupportedFakeTensorException: - pass - except torch._subclasses.fake_tensor.UnsupportedOperatorException: - pass - except torch._subclasses.fake_tensor.DynamicOutputShapeException: - self.assertTrue( - name in dynamic_output_op_tests - or name in sometimes_dynamic_output_op_test - ) - self.assertTrue( - mode.shape_env is None - or not mode.shape_env.allow_dynamic_output_shape_ops - or name not in supported_dynamic_output_op_tests - ) - except torch._subclasses.fake_tensor.DataDependentOutputException: - self.assertTrue(name in data_dependent_op_tests) - - run_with_fake_mode_and_verify(mode) - if name in supported_dynamic_output_op_tests: - run_with_fake_mode_and_verify( - allow_dynamic_output_shape_mode, match_results=False - ) - - @ops(op_db, dtypes=OpDTypes.any_one) - def test_pointwise_ops(self, device, dtype, op): - name = op.name - if op.variant_test_name: - name += "." + op.variant_test_name - if name in fake_skips or "sparse" in name or "jiterator" in name: - self.skipTest("Skip failing test") - - test_self = self - - class TestPointwiseMode(TorchDispatchMode): - def __torch_dispatch__(self, func, types, args=(), kwargs=None): - kwargs = kwargs or {} - - out = func(*args, **kwargs) - - if torch.Tag.pointwise in func.tags: - shapes = [] - for inp in pytree.arg_tree_leaves(*args, **kwargs): - if isinstance(inp, torch.Tensor): - shapes.append(inp.shape) - - out_shape = torch._refs._broadcast_shapes(*shapes) - - for out_elem in pytree.tree_leaves(out): - if isinstance(out_elem, torch.Tensor): - test_self.assertEqual(out_elem.shape, out_shape) - - return out - - samples = op.sample_inputs(device, dtype, requires_grad=False) - for sample in samples: - mode = FakeTensorMode() - - def map_to_fake(e): - if isinstance(e, torch.Tensor): - return mode.from_tensor(e) - else: - return e - - input = tree_map(map_to_fake, sample.input) - args = tree_map(map_to_fake, sample.args) - kwargs = tree_map(map_to_fake, sample.kwargs) - - try: - op(input, *args, **kwargs) - except Exception as e: - continue - - with TestPointwiseMode(): - with mode: - op(input, *args, **kwargs) - - @ops(op_db, dtypes=OpDTypes.any_one) - def test_fake(self, device, dtype, op): - self._test_fake_helper(device, dtype, op, contextlib.nullcontext) - - @ops(op_db, dtypes=OpDTypes.any_one) - def test_fake_autocast(self, device, dtype, op): - if op.name in fake_autocast_device_skips[device]: - self.skipTest("Skip failing test") - context = ( - torch.cuda.amp.autocast if device == "cuda" else torch.cpu.amp.autocast - ) - self._test_fake_helper(device, dtype, op, context) - - def _test_fake_crossref_helper(self, device, dtype, op, context): - samples = op.sample_inputs(device, dtype, requires_grad=True) - - for iter, sample in enumerate(samples): - args = [sample.input] + list(sample.args) - kwargs = sample.kwargs - - # skip these to speed up tests - common_skip_ops = ( - aten.detach.default, - aten.empty_strided.default, - aten.copy_.default, - aten.is_same_size.default, - ) - - # TODO: enable check_aliasing, batch norm fails - try: - with torch._subclasses.CrossRefFakeMode( - ignore_op_fn=lambda fn: fn in common_skip_ops, check_aliasing=True - ): - with warnings.catch_warnings(), context(), torch.autograd.set_multithreading_enabled( - False - ): - composite_compliance.compute_expected_grads( - op.get_op(), - args, - kwargs, - sample.output_process_fn_grad, - op.gradcheck_wrapper, - ) - except torch._subclasses.fake_tensor.UnsupportedOperatorException: - pass - - @onlyCUDA - @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) - @skipOps( - "TestFakeTensor", "test_fake_crossref_backward_no_amp", fake_backward_xfails - ) - def test_fake_crossref_backward_no_amp(self, device, dtype, op): - self._test_fake_crossref_helper(device, dtype, op, contextlib.nullcontext) - - @onlyCUDA - @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) - @skipOps( - "TestFakeTensor", - "test_fake_crossref_backward_amp", - fake_backward_xfails | fake_autocast_backward_xfails, - ) - def test_fake_crossref_backward_amp(self, device, dtype, op): - self._test_fake_crossref_helper(device, dtype, op, torch.cuda.amp.autocast) - - @ops([op for op in ops_and_refs if op.is_factory_function]) - def test_strided_layout(self, device, dtype, op): - samples = op.sample_inputs(device, dtype) - for sample in samples: - kwargs = sample.kwargs.copy() - kwargs["layout"] = torch.strided - strided_result = op(sample.input, *sample.args, **kwargs) - self.assertEqual(strided_result.layout, torch.strided) - - -instantiate_device_type_tests(TestCommon, globals()) -instantiate_device_type_tests(TestCompositeCompliance, globals()) -instantiate_device_type_tests(TestMathBits, globals()) -instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") -instantiate_device_type_tests(TestFakeTensor, globals()) -instantiate_device_type_tests(TestTags, globals()) +# @ops(op_db, allowed_dtypes=(torch.float,)) +# def test_view_replay(self, device, dtype, op): +# def _assert_match_metadata(a, b): +# self.assertEqual(a.size(), b.size()) +# self.assertEqual(a.stride(), b.stride()) +# self.assertEqual(a.storage_offset(), b.storage_offset()) +# self.assertEqual(a.device, b.device) +# self.assertEqual(a.dtype, b.dtype) + +# # ensure view replay is enabled +# with torch.autograd._force_original_view_tracking(True): +# for sample in op.sample_inputs(device, dtype, requires_grad=False): +# inp = sample.input +# outs = op(inp, *sample.args, **sample.kwargs) +# if not isinstance(outs, (tuple, List)): +# outs = [outs] + +# # for all outputs that are views of the input, we should be able to replay the +# # forward and reverse views via a functioning view_func() / rev_view_func(). +# for out in outs: +# if not ( +# isinstance(out, torch.Tensor) +# and out._is_view() +# and out._base is inp +# ): +# continue + +# # forward view_func +# new_inp = inp.clone() +# _assert_match_metadata(new_inp, inp) +# new_out = out._view_func_unsafe(new_inp) +# _assert_match_metadata(new_out, out) +# self.assertEqual(new_out, out) + +# # reverse view_func +# new_out = out.detach() +# new_inp = out._rev_view_func_unsafe(new_out) +# _assert_match_metadata(new_inp, inp) +# self.assertTrue(new_inp._is_view()) +# self.assertTrue(new_inp._base is new_out) + + +# @unMarkDynamoStrictTest +# class TestMathBits(TestCase): +# # Tests that +# # 1. The operator's output for physically conjugated/negated tensors and conjugate/negative view tensors +# # produces the same value +# # 2. The gradients are same in both cases mentioned in (1) +# # 3. If the operator's inplace variant is supported, tests that the inplace operation +# # produces the correct value when called on a conjugate/negative view tensor and that the output +# # has its conj/neg bit set to true +# # This test only runs for C -> R and C -> C functions +# # TODO: add tests for `R->C` functions +# # Note: This test runs for functions that take both tensors and tensorlists as input. +# def _test_math_view( +# self, +# device, +# dtype, +# op, +# samples, +# math_op_physical, +# math_op_view, +# is_bit_set, +# out_type, +# ): +# inplace_variant = op.inplace_variant + +# # helper function to clone and conjugate/negate the input if its a tensor +# # else clone the sequence and conjugate/negate the first element in the sequence +# # If a requires_grad argument is provided the tensor being conjugated/negated will +# # have its requires_grad set to that value. +# def clone_and_perform_view(input, **kwargs): +# if isinstance(input, torch.Tensor): +# requires_grad = kwargs.get("requires_grad", input.requires_grad) +# with torch.no_grad(): +# # Ensure view represents the original sample input +# input = math_op_physical(input) +# # Note: .conj() is not called under no_grad mode since it's not allowed to modify a +# # view created in no_grad mode. Here it's ok to do so, so as a workaround we call conj +# # before resetting the requires_grad field for input +# input = math_op_view(input) +# assert input.is_leaf +# return input.requires_grad_(requires_grad) + +# if isinstance(input, Sequence): +# out = list(map(clone_input_helper, input)) +# out[0] = clone_and_perform_view(out[0]) +# return tuple(out) + +# for sample in samples: +# tensor = ( +# sample.input +# if isinstance(sample.input, torch.Tensor) +# else sample.input[0] +# ) +# cloned1 = clone_and_perform_view(sample.input) + +# # Computes function forward value with a physically conjugated/negated tensor and +# # a conj/neg view tensor and verifies that the output in both case are equal. +# expected_forward = op(sample.input, *sample.args, **sample.kwargs) +# forward_with_mathview = op(cloned1, *sample.args, **sample.kwargs) +# self.assertEqual(expected_forward, forward_with_mathview) + +# # If the op has an inplace variant, and the input doesn't require broadcasting +# # and has the same dtype as output, verify that the inplace operation on a conjugated/negated +# # input produces correct output, and the output tensor has the conj/neg bit set to True +# if inplace_variant is not None and not sample.broadcasts_input: +# cloned2 = clone_and_perform_view(tensor, requires_grad=False) +# if ( +# isinstance(expected_forward, torch.Tensor) +# and expected_forward.dtype is tensor.dtype +# ): +# inplace_forward = inplace_variant( +# cloned2, *sample.args, **sample.kwargs +# ) +# self.assertTrue(is_bit_set(inplace_forward)) +# self.assertEqual(inplace_forward, expected_forward) + +# # TODO: backward consistency only supported for single tensor outputs +# # TODO: backward consistency only checked on sample.input, not all +# # tensor inputs +# # TODO: update to handle checking grads of all tensor inputs as +# # derived from each tensor output +# if ( +# isinstance(expected_forward, torch.Tensor) +# and expected_forward.requires_grad +# ): +# output_process_fn_grad = sample.output_process_fn_grad or (lambda x: x) +# expected_forward = output_process_fn_grad(expected_forward) +# forward_with_mathview = output_process_fn_grad(forward_with_mathview) + +# tensor = ( +# sample.input +# if isinstance(sample.input, torch.Tensor) +# else sample.input[0] +# ) +# expected_forward.sum().abs().backward(retain_graph=True) +# forward_with_mathview.sum().abs().backward(retain_graph=True) +# if tensor.grad is not None: +# cloned1_tensor = ( +# cloned1 if isinstance(cloned1, torch.Tensor) else cloned1[0] +# ) +# self.assertEqual(tensor.grad, cloned1_tensor.grad) + +# tensor.grad, cloned1_tensor.grad = None, None + +# # a repeat of the above test if output is not complex valued +# if out_type(expected_forward): +# grad = torch.randn_like(expected_forward) +# expected_forward.backward(grad) +# forward_with_mathview.backward( +# math_op_view(math_op_physical(grad)) +# ) + +# self.assertEqual(tensor.grad, cloned1_tensor.grad) + +# @ops(ops_and_refs, allowed_dtypes=(torch.cfloat,)) +# def test_conj_view(self, device, dtype, op): +# if not op.test_conjugated_samples: +# self.skipTest("Operation doesn't support conjugated inputs.") +# math_op_physical = torch.conj_physical +# math_op_view = torch.conj +# _requires_grad = torch.cfloat in op.supported_backward_dtypes( +# torch.device(device).type +# ) +# is_bit_set = torch.is_conj +# samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) +# self._test_math_view( +# device, +# dtype, +# op, +# samples, +# math_op_physical, +# math_op_view, +# is_bit_set, +# torch.is_complex, +# ) + +# @ops(ops_and_refs, allowed_dtypes=(torch.double,)) +# def test_neg_view(self, device, dtype, op): +# if not op.test_neg_view: +# self.skipTest("Operation not tested with tensors with negative bit.") +# math_op_physical = torch.neg +# math_op_view = torch._neg_view +# is_bit_set = torch.is_neg +# samples = op.sample_inputs(device, dtype, requires_grad=op.supports_autograd) +# self._test_math_view( +# device, +# dtype, +# op, +# samples, +# math_op_physical, +# math_op_view, +# is_bit_set, +# lambda x: True, +# ) + +# @ops(ops_and_refs, allowed_dtypes=(torch.cdouble,)) +# def test_neg_conj_view(self, device, dtype, op): +# if not op.test_neg_view: +# self.skipTest("Operation not tested with tensors with negative bit.") +# if not op.test_conjugated_samples: +# self.skipTest("Operation doesn't support conjugated inputs.") + +# def math_op_physical(x): +# return -x.conj_physical() + +# def math_op_view(x): +# return torch._neg_view(x).conj() + +# def is_bit_set(x): +# return torch.is_neg(x) and torch.is_conj(x) + +# _requires_grad = dtype in op.supported_backward_dtypes( +# torch.device(device).type +# ) +# samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) +# # Only test one sample +# samples = itertools.islice(samples, 1) +# self._test_math_view( +# device, +# dtype, +# op, +# samples, +# math_op_physical, +# math_op_view, +# is_bit_set, +# torch.is_complex, +# ) + + +# # input strides and size may have been altered due to the result of an inplace op +# def check_inplace_view(func, input, rs, input_size, input_strides): +# if func is None: +# return +# # TODO: extend this test to test ops with multiple outputs and ops like native_batch_norm(_legit).out +# # which mutate not necessarily the first input. +# if isinstance(rs, torch.Tensor) and rs is input: +# unequal_size = rs.size() != input_size +# unequal_strides = rs.stride() != input_strides +# # resize_ should probably have inplace_view tag. Not adding the tag since it +# # breaks some codegen logic +# if unequal_size or unequal_strides: +# if isinstance(func, torch._ops.OpOverloadPacket): +# func = func.default +# # Reference: https://github.com/pytorch/pytorch/issues/78759 +# if func is not torch.ops.aten.resize_.default: +# # TODO: use self.assertIn when we have separate tests for each tag +# assert torch.Tag.inplace_view in func.tags + + +# # A mode that when enabled runs correctness checks to ensure +# # that operators have expected tags based on their input and +# # output tensor properties +# class TestTagsMode(TorchDispatchMode): +# def __torch_dispatch__(self, func, types, args=(), kwargs=None): +# if isinstance(args[0], torch.Tensor): +# old_size = args[0].size() +# old_stride = args[0].stride() +# rs = func(*args, **kwargs) +# check_inplace_view(func, args[0], rs, old_size, old_stride) +# else: +# rs = func(*args, **kwargs) +# return rs + + +# # Test to verify the correctness for tags in `tags.yaml`, also available for access through `torch.Tags` +# @unMarkDynamoStrictTest +# class TestTags(TestCase): +# @onlyCPU +# @ops(ops_and_refs, dtypes=OpDTypes.any_one) +# def test_tags(self, device, dtype, op): +# samples = op.sample_inputs(device, dtype, requires_grad=False) +# for sample in samples: +# # TODO: Test tags for ops that return a list of tensors +# input = sample.input +# if isinstance(input, torch.Tensor): +# old_size = input.size() +# old_stride = input.stride() +# with TestTagsMode(): +# rs = op(input, *sample.args, **sample.kwargs) +# # TODO: add test for aliases: https://github.com/pytorch/pytorch/issues/78761 +# aten_name = op.aten_name if op.aten_name is not None else op.name +# opoverloadpacket = getattr(torch.ops.aten, aten_name, None) +# check_inplace_view(opoverloadpacket, input, rs, old_size, old_stride) + + +# class TestSelfKwarg(TestCase): +# def test_self_kwargs(self): +# """Verify that we can call the aten ops with all kwargs even if the +# argument's name is "self" +# """ +# torch.ops.aten.reshape.default(self=torch.rand(1, 2), shape=[2]) +# torch.ops.aten.min.default(self=torch.rand(100)) + + +# @unMarkDynamoStrictTest +# class TestRefsOpsInfo(TestCase): +# import_paths = [ +# "_refs", +# "_refs.special", +# "_refs.nn.functional", +# "_refs.fft", +# "_refs._conversions", +# ] +# module_alls = [ +# (path, import_module(f"torch.{path}").__all__) for path in import_paths +# ] +# ref_ops_names = tuple( +# itertools.chain.from_iterable( +# [f"{path}.{op}" for op in module_all] for path, module_all in module_alls +# ) +# ) +# ref_db_names = {ref_op.name for ref_op in python_ref_db} + +# # TODO: References that do not have an entry in python_ref_db +# skip_ref_ops = { +# "_refs.alias", +# "_refs.bitwise_right_shift", +# "_refs.copy_to", +# "_refs.empty_permuted", +# "_refs.empty_strided", +# "_refs.equal", +# "_refs.full", +# "_refs.full_like", +# "_refs.is_complex", +# "_refs.to", +# "_refs.mvlgamma", +# "_refs.ones", +# "_refs.ones_like", +# "_refs.special.expit", +# "_refs.std_var", +# "_refs.swap_axes", +# "_refs.uniform", +# "_refs.scalar_tensor", +# "_refs.trunc_divide", +# "_refs.zero", +# "_refs.zeros", +# "_refs.zeros_like", +# "_refs.rfloordiv", +# "_refs.rtruediv", +# "_refs.rpow", +# # These should be tested with their out-of-place counterparts +# "_refs.index_add_", +# "_refs.index_copy_", +# "_refs.index_fill_", +# "_refs.native_group_norm", +# } + +# not_in_decomp_table = { +# # duplicated in _decomp and _refs +# "_refs.nn.functional.group_norm", +# "_refs.nn.functional.mse_loss", +# "_refs.floor_divide", +# # duplicated as refs do not have decent support for advanced indexing +# "_refs.index_copy", +# "_refs.index_copy_", +# "_refs.index_add", +# "_refs.index_add_", +# # these are not aten ops? +# "_refs._conversions.bfloat16", +# "_refs._conversions.bool", +# "_refs._conversions.byte", +# "_refs._conversions.char", +# "_refs._conversions.double", +# "_refs._conversions.float", +# "_refs._conversions.half", +# "_refs._conversions.int", +# "_refs._conversions.long", +# "_refs._conversions.short", +# "_refs._conversions.chalf", +# "_refs._conversions.cfloat", +# "_refs._conversions.cdouble", +# "_refs.broadcast_shapes", +# "_refs.broadcast_tensors", +# "_refs.mvlgamma", +# "_refs.nn.functional.layer_norm", +# "_refs.nn.functional.tanhshrink", +# "_refs.nn.functional.triplet_margin_loss", +# "_refs.rfloordiv", +# "_refs.rtruediv", +# "_refs.rpow", +# # CompositeImplicitAutograd +# "_refs.allclose", +# "_refs.atleast_1d", +# "_refs.atleast_2d", +# "_refs.atleast_3d", +# "_refs.broadcast_to", +# "_refs.chunk", +# "_refs.column_stack", +# "_refs.contiguous", +# "_refs.dsplit", +# "_refs.dstack", +# "_refs.fill", +# "_refs.fill_", +# "_refs.flatten", +# "_refs.fliplr", +# "_refs.flipud", +# "_refs.float_power", +# "_refs.hsplit", +# "_refs.hstack", +# "_refs.isclose", +# "_refs.isfinite", +# "_refs.isreal", +# "_refs.istft", +# "_refs.log_softmax", +# "_refs.movedim", +# "_refs.narrow", +# "_refs.nn.functional.dropout", +# "_refs.nn.functional.l1_loss", +# "_refs.nn.functional.smooth_l1_loss", +# "_refs.nn.functional.log_softmax", +# "_refs.nn.functional.poisson_nll_loss", +# "_refs.nn.functional.softmax", +# "_refs.nn.functional.softmin", +# "_refs.positive", +# "_refs.ravel", +# "_refs.reshape", +# "_refs.softmax", +# "_refs.special.expit", +# "_refs.special.log_softmax", +# "_refs.special.softmax", +# "_refs.square", +# "_refs.stft", +# "_refs.T", +# "_refs.take_along_dim", +# "_refs.tensor_split", +# "_refs.to", +# "_refs.true_divide", +# "_refs.trunc_divide", +# "_refs.vsplit", +# "_refs.vstack", +# "_refs.linalg.matrix_norm", +# "_refs.linalg.norm", +# "_refs.linalg.svd", +# "_refs.linalg.svdvals", +# "_refs.unflatten", +# "_refs.sum_to_size", +# # ref implementation missing kwargs +# "_refs.full_like", # missing "layout" +# "_refs.scalar_tensor", # missing "layout" +# # other +# "_refs.block_diag", # only refs._block_diag_iterable is in decomposition table +# "_refs.empty", # intentional; direct empty is faster and has less guards +# "_refs.empty_permuted", # intentional; direct empty is faster and has less guards +# "_refs.expand_as", +# "_refs.as_strided", # _prims._as_strided_meta: "reduce() of empty sequence with no initial value" +# "_refs.copy_to", # torch._C._jit_get_operation: No such operator aten::copy_to +# "_refs.equal", # 'bool' object has no attribute 'dtype' +# "_refs.conj", # Calls _prims.conj +# "_refs.real", +# "_refs.imag", +# "_refs.reshape_as", +# "_refs.view_as", +# "_refs.view_as_complex", # TorchInductor does not support complex at the moment. +# # the decompositions for these ops are slightly different +# # because of out handling +# "_refs.var_mean", +# "_refs.std_mean", +# "_refs.native_layer_norm", +# } + +# @parametrize("op", ref_ops_names) +# def test_refs_are_in_python_ref_db(self, op): +# inplace = op[-1] == "_" +# if op in self.skip_ref_ops: +# raise unittest.SkipTest(f"{op} does not have an entry in python_ref_db") +# elif inplace: +# self.assertNotIn( +# op, +# self.ref_db_names, +# msg=f"{op} is an in-place operation and should not have an OpInfo", +# ) +# else: +# # Intentionally don't use assertIn to avoid printing the +# # (very large) container +# self.assertTrue(op in self.ref_db_names, msg=f"{op} not in ref_db_names") + +# @parametrize("op", ref_ops_names) +# def test_refs_are_in_decomp_table(self, op): +# path = op.split(".") +# module_path = ".".join(path[:-1]) +# op_name = path[-1] +# op_impl = getattr(import_module(f"torch.{module_path}"), op_name) + +# if op in self.not_in_decomp_table: +# self.assertNotIn( +# op_impl, +# torch._decomp.decomposition_table.values(), +# f"Unexpectedly found {op} in torch._decomp.decomposition_table.values()", +# ) +# else: +# self.assertIn( +# op_impl, +# torch._decomp.decomposition_table.values(), +# f"Did not find {op} in torch._decomp.decomposition_table.values()", +# ) + + +# fake_skips = ( +# "aminmax", # failing input +# "cov", # aweights cannot be negtaive +# "istft", # window overlap add min: 0 +# "linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet +# "linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend +# "linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend +# # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen +# "linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend +# "linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor +# "linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend +# "linalg.tensorsolve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' +# "lu_solve", # MALLOC ERROR: debug +# "multinomial", # Could not run 'aten::multinomial' with arguments from the 'Meta' backend +# "mvlgamma.mvlgamma_p_1", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend +# "mvlgamma.mvlgamma_p_3", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend +# "mvlgamma.mvlgamma_p_5", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend +# "nanmean", # logical_not() got an unexpected keyword argument 'out' +# "quantile", # quantile() q values must be in the range [0, 1] +# "nanquantile", # quantile() q values must be in the range [0, 1] +# "nn.functional.ctc_loss", # The tensor has a non-zero number of elements, but its data is not allocated yet +# "nn.functional.embedding_bag", # sometimes errors +# "nn.functional.nll_loss", # sometimes errors +# "nn.functional.max_pool1d", # The tensor has a non-zero number of elements +# "to_sparse", # Could not run 'aten::_to_sparse' with arguments from the 'Meta' backend +# "tensor_split", # The tensor has a non-zero number of elements, but its data is not allocated yet +# "repeat_interleave", # cannot repeat_interleave a meta tensor without output_size +# "sparse.sampled.addmm", # sparsity not supported +# # Can not infer total number of classes from meta. no way at present to throw DynamicOutputShapeException +# "nn.functional.one_hot", +# "narrow", # Fails only for one overload with DataDependentOutputException (hence skip). +# ) + +# fake_autocast_device_skips = defaultdict(dict) + +# # TODO: investigate/fix +# fake_autocast_device_skips["cpu"] = {"linalg.pinv"} + + +# dynamic_output_op_tests = ( +# "argwhere", +# "bincount", +# "combinations", +# "linalg.lstsq", +# "masked_select", +# "nonzero", +# "unique_consecutive", +# "unique", +# "linalg.lstsq.grad_oriented", +# ) + +# # Ops that have dynamic output shapes that we can handle when +# # allow_dynamic_shape_ops is True in fake tensor shape environment. +# supported_dynamic_output_op_tests = ( +# "nonzero", +# "unique", +# "repeat_interleave", +# "masked_select", +# ) + +# # some inputs invoke dynamic output shape operators, some do not +# sometimes_dynamic_output_op_test = ( +# "__getitem__", +# "index_select", +# ) + +# data_dependent_op_tests = ( +# "equal", +# "corrcoef", +# "nn.functional.gaussian_nll_loss", +# "allclose", +# ) + +# aliasing_failures = ("histogramdd",) + +# fake_backward_skips = { +# "linalg.cond", +# "linalg.matrix_norm", +# "linalg.norm", +# "linalg.svd", +# "linalg.svdvals", +# "pca_lowrank", +# "roll", +# "svd_lowrank", +# "sgn", +# } + +# fake_backward_xfails = {skip(s) for s in fake_backward_skips} | { +# xfail("fft.ihfftn"), # Mismatch in aten._conj_physical.default +# xfail("fft.ihfft2"), # Mismatch in aten._conj_physical.default +# skip("nn.functional.ctc_loss"), +# } + +# fake_autocast_backward_xfails = { +# skip("nn.functional.binary_cross_entropy"), +# skip("sparse.sampled_addmm"), +# skip("linalg.pinv"), +# skip("linalg.pinv", "hermitian"), +# skip("linalg.pinv", "singular"), +# skip("pinverse"), +# } + + +# @unMarkDynamoStrictTest +# class TestFakeTensor(TestCase): +# def setUp(self): +# # Turn on FakeTensor caching and cross-checking for these tests: +# cache_enabled = unittest.mock.patch( +# "torch._dynamo.config.fake_tensor_cache_enabled", True +# ) +# cache_enabled.start() +# self.addCleanup(cache_enabled.stop) + +# cache_crosscheck = unittest.mock.patch( +# "torch._dynamo.config.fake_tensor_cache_crosscheck_enabled", True +# ) +# cache_crosscheck.start() +# self.addCleanup(cache_crosscheck.stop) + +# def _test_fake_helper(self, device, dtype, op, context): +# name = op.name +# if op.variant_test_name: +# name += "." + op.variant_test_name +# if name in fake_skips or "sparse" in name or "jiterator" in name: +# self.skipTest("Skip failing test") + +# samples = op.sample_inputs(device, dtype, requires_grad=False) +# for sample in samples: +# mode = FakeTensorMode() + +# from torch.fx.experimental.symbolic_shapes import ShapeEnv + +# allow_dynamic_output_shape_shape_env = ShapeEnv( +# allow_dynamic_output_shape_ops=True +# ) + +# allow_dynamic_output_shape_mode = FakeTensorMode( +# shape_env=allow_dynamic_output_shape_shape_env +# ) + +# try: +# with context(): +# res = op(sample.input, *sample.args, **sample.kwargs) +# except Exception: +# continue + +# def run_with_fake_mode_and_verify(fake_mode, match_results=True): +# def map_to_fake(e): +# if isinstance(e, torch.Tensor): +# return fake_mode.from_tensor(e) +# else: +# return e + +# input = tree_map(map_to_fake, sample.input) +# args = tree_map(map_to_fake, sample.args) +# kwargs = tree_map(map_to_fake, sample.kwargs) + +# try: +# with context(): +# with fake_mode: +# res_fake = op(input, *args, **kwargs) + +# if not match_results: +# return + +# for fake_out, real_out in zip( +# pytree.tree_leaves(res_fake), pytree.tree_leaves(res) +# ): +# if not isinstance(fake_out, torch.Tensor): +# self.assertTrue(not isinstance(real_out, torch.Tensor)) +# self.assertEqual(fake_out, real_out) +# continue + +# self.assertTrue(isinstance(fake_out, FakeTensor)) +# # if you see a shape exception here, you may need to add +# # a `dynamic_output_shape` tag to an operator + +# # prims/decomps must correctly model strides, +# # see https://github.com/pytorch/pytorch/issues/78050#issuecomment-1253950325 +# prims.utils.compare_tensor_meta(fake_out, real_out, True) + +# if name not in aliasing_failures: +# fake_aliasing = outputs_alias_inputs( +# (input, args, kwargs), res_fake +# ) +# real_aliasing = outputs_alias_inputs( +# (sample.input, sample, args, sample.kwargs), res +# ) +# self.assertEqual(fake_aliasing, real_aliasing) + +# self.assertTrue( +# name not in dynamic_output_op_tests +# and name not in data_dependent_op_tests +# ) + +# except torch._subclasses.fake_tensor.UnsupportedFakeTensorException: +# pass +# except torch._subclasses.fake_tensor.UnsupportedOperatorException: +# pass +# except torch._subclasses.fake_tensor.DynamicOutputShapeException: +# self.assertTrue( +# name in dynamic_output_op_tests +# or name in sometimes_dynamic_output_op_test +# ) +# self.assertTrue( +# mode.shape_env is None +# or not mode.shape_env.allow_dynamic_output_shape_ops +# or name not in supported_dynamic_output_op_tests +# ) +# except torch._subclasses.fake_tensor.DataDependentOutputException: +# self.assertTrue(name in data_dependent_op_tests) + +# run_with_fake_mode_and_verify(mode) +# if name in supported_dynamic_output_op_tests: +# run_with_fake_mode_and_verify( +# allow_dynamic_output_shape_mode, match_results=False +# ) + +# @ops(op_db, dtypes=OpDTypes.any_one) +# def test_pointwise_ops(self, device, dtype, op): +# name = op.name +# if op.variant_test_name: +# name += "." + op.variant_test_name +# if name in fake_skips or "sparse" in name or "jiterator" in name: +# self.skipTest("Skip failing test") + +# test_self = self + +# class TestPointwiseMode(TorchDispatchMode): +# def __torch_dispatch__(self, func, types, args=(), kwargs=None): +# kwargs = kwargs or {} + +# out = func(*args, **kwargs) + +# if torch.Tag.pointwise in func.tags: +# shapes = [] +# for inp in pytree.arg_tree_leaves(*args, **kwargs): +# if isinstance(inp, torch.Tensor): +# shapes.append(inp.shape) + +# out_shape = torch._refs._broadcast_shapes(*shapes) + +# for out_elem in pytree.tree_leaves(out): +# if isinstance(out_elem, torch.Tensor): +# test_self.assertEqual(out_elem.shape, out_shape) + +# return out + +# samples = op.sample_inputs(device, dtype, requires_grad=False) +# for sample in samples: +# mode = FakeTensorMode() + +# def map_to_fake(e): +# if isinstance(e, torch.Tensor): +# return mode.from_tensor(e) +# else: +# return e + +# input = tree_map(map_to_fake, sample.input) +# args = tree_map(map_to_fake, sample.args) +# kwargs = tree_map(map_to_fake, sample.kwargs) + +# try: +# op(input, *args, **kwargs) +# except Exception as e: +# continue + +# with TestPointwiseMode(): +# with mode: +# op(input, *args, **kwargs) + +# @ops(op_db, dtypes=OpDTypes.any_one) +# def test_fake(self, device, dtype, op): +# self._test_fake_helper(device, dtype, op, contextlib.nullcontext) + +# @ops(op_db, dtypes=OpDTypes.any_one) +# def test_fake_autocast(self, device, dtype, op): +# if op.name in fake_autocast_device_skips[device]: +# self.skipTest("Skip failing test") +# context = ( +# torch.cuda.amp.autocast if device == "cuda" else torch.cpu.amp.autocast +# ) +# self._test_fake_helper(device, dtype, op, context) + +# def _test_fake_crossref_helper(self, device, dtype, op, context): +# samples = op.sample_inputs(device, dtype, requires_grad=True) + +# for iter, sample in enumerate(samples): +# args = [sample.input] + list(sample.args) +# kwargs = sample.kwargs + +# # skip these to speed up tests +# common_skip_ops = ( +# aten.detach.default, +# aten.empty_strided.default, +# aten.copy_.default, +# aten.is_same_size.default, +# ) + +# # TODO: enable check_aliasing, batch norm fails +# try: +# with torch._subclasses.CrossRefFakeMode( +# ignore_op_fn=lambda fn: fn in common_skip_ops, check_aliasing=True +# ): +# with warnings.catch_warnings(), context(), torch.autograd.set_multithreading_enabled( +# False +# ): +# composite_compliance.compute_expected_grads( +# op.get_op(), +# args, +# kwargs, +# sample.output_process_fn_grad, +# op.gradcheck_wrapper, +# ) +# except torch._subclasses.fake_tensor.UnsupportedOperatorException: +# pass + +# @onlyCUDA +# @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) +# @skipOps( +# "TestFakeTensor", "test_fake_crossref_backward_no_amp", fake_backward_xfails +# ) +# def test_fake_crossref_backward_no_amp(self, device, dtype, op): +# self._test_fake_crossref_helper(device, dtype, op, contextlib.nullcontext) + +# @onlyCUDA +# @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) +# @skipOps( +# "TestFakeTensor", +# "test_fake_crossref_backward_amp", +# fake_backward_xfails | fake_autocast_backward_xfails, +# ) +# def test_fake_crossref_backward_amp(self, device, dtype, op): +# self._test_fake_crossref_helper(device, dtype, op, torch.cuda.amp.autocast) + +# @ops([op for op in ops_and_refs if op.is_factory_function]) +# def test_strided_layout(self, device, dtype, op): +# samples = op.sample_inputs(device, dtype) +# for sample in samples: +# kwargs = sample.kwargs.copy() +# kwargs["layout"] = torch.strided +# strided_result = op(sample.input, *sample.args, **kwargs) +# self.assertEqual(strided_result.layout, torch.strided) + + +#instantiate_device_type_tests(TestCommon, globals(), only_for="xpu") +instantiate_device_type_tests(TestCompositeCompliance, globals(), only_for="xpu") +#instantiate_device_type_tests(TestMathBits, globals()) +#instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") +#instantiate_device_type_tests(TestFakeTensor, globals()) +#instantiate_device_type_tests(TestTags, globals()) if __name__ == "__main__": TestCase._default_dtype_check_enabled = True + #import pdb + #pdb.set_trace() run_tests() diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 8c2a72390aca9..8d93fcc590b2c 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -10807,6 +10807,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_rhs_python_scalar=False, supports_fwgrad_bwgrad=True, rhs_make_tensor_kwargs=dict(exclude_zero=False), + skipXPU=False, skips=( # RuntimeError: "max_elementwise_cuda" not implemented for 'ComplexFloat' DecorateInfo(unittest.expectedFailure, @@ -10825,6 +10826,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_rhs_python_scalar=False, supports_fwgrad_bwgrad=True, rhs_make_tensor_kwargs=dict(exclude_zero=False), + skipXPU=False, skips=( # RuntimeError: "min_elementwise_cuda" not implemented for 'ComplexFloat' DecorateInfo(unittest.expectedFailure, @@ -10848,7 +10850,9 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_sparse_csr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csr), sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csc), sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsr), - sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsc)), + sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsc), + skipXPU=False), + BinaryUfuncInfo('sub', # NumPy has no builtin reference for the alpha kwarg, but it is easy enough to emulate ref=lambda input, other, *, alpha=1: np.subtract(input, np.multiply(alpha, other)), @@ -10875,6 +10879,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): toleranceOverride({torch.chalf: tol(atol=5e-3, rtol=0)}), 'TestDecomp', 'test_quick', device_type='cpu'), ), + skipXPU=False, skips=( DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', @@ -11326,7 +11331,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.bitwise_not, dtypes=integral_types_and(torch.bool), operator_variant=operator.invert, - supports_autograd=False), + supports_autograd=False, + skipXPU=False), BinaryUfuncInfo('bitwise_left_shift', op=torch.bitwise_left_shift, dtypes=integral_types(), @@ -11456,6 +11462,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_forward_ad=True, supports_fwgrad_bwgrad=True, supports_out=False, + skipXPU=False, skips=( # TypeError: _copy_dispatcher() got an unexpected keyword argument 'memory_format' # (NumPy reference needs to be extended with memory_format) @@ -11497,6 +11504,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): assert_autodiffed=True, supports_forward_ad=True, supports_fwgrad_bwgrad=True, + skipXPU=False, skips=( # NNC appear to not handle boolean clamp DecorateInfo(unittest.expectedFailure, @@ -11567,6 +11575,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_view_as_real, test_conjugated_samples=False, + skipXPU=False, ), OpInfo('view_as_complex', dtypes=floating_types_and(torch.half), @@ -11575,6 +11584,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, test_neg_view=False, sample_inputs_func=sample_inputs_view_as_complex, + skipXUP=False, skips=( # RuntimeError: Tensor must have a last dimension with stride 1 DecorateInfo(unittest.expectedFailure, "TestCommon", "test_noncontiguous_samples"), @@ -11621,12 +11631,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.cos, dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), + dtypesIfXPU=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), assert_autodiffed=True, handles_large_floats=False, supports_forward_ad=True, supports_fwgrad_bwgrad=True, promotes_int_to_float=True, decorators=(precisionOverride({torch.bfloat16: 1e-2}),), + skipXPU=False, skips=( DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large', dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu', active_if=IS_WINDOWS), @@ -11715,6 +11727,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.half, torch.bfloat16), supports_forward_ad=True, supports_fwgrad_bwgrad=True, + skipXPU=False, skips=( # cumsum does not handle correctly out= dtypes DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'), @@ -11783,6 +11796,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): variant_test_name='no_rounding_mode', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), + dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), # Runs very slowly on slow gradcheck - alternatively reduce input sizes gradcheck_fast_mode=True, supports_forward_ad=True, @@ -11790,7 +11804,9 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, supports_two_python_scalars=True, assert_autodiffed=True, - rhs_make_tensor_kwargs=dict(exclude_zero=True),), + rhs_make_tensor_kwargs=dict(exclude_zero=True), + skipXPU=False,), + BinaryUfuncInfo('div', aliases=('divide',), variant_test_name='trunc_rounding', @@ -11807,6 +11823,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # See https://github.com/pytorch/pytorch/issues/111126 DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'), ), + skipXPU=False, skips=( # RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'), @@ -11827,6 +11844,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # See https://github.com/pytorch/pytorch/issues/111126 DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'), ), + skipXPU=False, skips=( # RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'), @@ -11930,6 +11948,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): always_returns_bool=True, supports_autograd=False, sample_inputs_func=sample_inputs_comparison_ops, + skipXPU=False, skips=( )), BinaryUfuncInfo('fmax', @@ -11956,12 +11975,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.fmod, dtypes=all_types_and(torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), + dtypesIfXPU=all_types_and(torch.float16, torch.bfloat16), # https://github.com/pytorch/pytorch/issues/80411 gradcheck_fast_mode=True, supports_forward_ad=True, supports_fwgrad_bwgrad=True, assert_autodiffed=None, rhs_make_tensor_kwargs={'exclude_zero': True}, + skipXPU=False, decorators=( DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_contig_vs_every_other', @@ -13008,6 +13029,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_operator_variant=operator.iand, supports_autograd=False, supports_one_python_scalar=True, + skipXPU=False, skips=( # RuntimeError: "bitwise_and_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', @@ -13020,6 +13042,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_operator_variant=operator.ior, supports_autograd=False, supports_one_python_scalar=True, + skipXPU=False, skips=( # TODO: FIXME: RuntimeError: "bitwise_or_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, @@ -13034,6 +13057,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_operator_variant=operator.ixor, supports_autograd=False, supports_one_python_scalar=True, + skipXPU=False, skips=( # TODO: FIXME: RuntimeError: "bitwise_xor_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, @@ -13070,6 +13094,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=integral_types_and(), supports_autograd=False, supports_rhs_python_scalar=False, + skipXPU=False, skips=( DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', @@ -15384,6 +15409,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_forward_ad=True, supports_fwgrad_bwgrad=True, autodiff_nonfusible_nodes=["aten::gelu"], + skipXPU=False, skips=( # AssertionError: Tensor-likes are not close! # May not replicate in CI @@ -17231,6 +17257,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_csc), sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsr), sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsc), + skipXPU=False, skips=( )), OpInfo('ones_like', @@ -17397,6 +17424,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), supports_out=True, sample_inputs_func=sample_inputs_ones_zeros, + skipXPU=False, skips=( # Tests that assume input is a tensor or sequence of tensors DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'), @@ -17539,6 +17567,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), sample_inputs_func=sample_inputs_empty, supports_autograd=False, + skipXPU=False, skips=( DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), # Empty tensor data is garbage so it's hard to make comparisons with it. @@ -17761,6 +17790,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_bernoulli, error_inputs_func=error_inputs_bernoulli, + skipXPU=False, skips=( # vmap: We do not yet support calling random operations inside of vmap DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'), @@ -18065,6 +18095,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): check_batched_forward_grad=False, dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16), supports_out=False, + skipXPU=False, skips=( # JIT has issue when op is passed as lambda # AssertionError: JIT Test does not execute any logic @@ -18082,6 +18113,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), supports_out=False, supports_autograd=False, + skipXPU=False, skips=( # Cannot resize variables that require grad DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'), @@ -18096,6 +18128,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), supports_out=False, supports_autograd=False, + skipXPU=False, skips=( # Cannot resize variables that require grad DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'), diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index b56d44c3904bb..6e6e1c596fd48 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -389,8 +389,7 @@ def composite_fn(test, generic_cls, device_cls, old_parametrize_fn=old_parametrize_fn, new_parametrize_fn=new_parametrize_fn): old_tests = list(old_parametrize_fn(test, generic_cls, device_cls)) - import pdb - pdb.set_trace() + for (old_test, old_test_name, old_param_kwargs, old_dec_fn) in old_tests: for (new_test, new_test_name, new_param_kwargs, new_dec_fn) in \ new_parametrize_fn(old_test, generic_cls, device_cls): @@ -405,8 +404,7 @@ def composite_fn(test, generic_cls, device_cls, old_test_name) def merged_decorator_fn(param_kwargs, old_dec_fn=old_dec_fn, new_dec_fn=new_dec_fn): - import pdb - pdb.set_trace() + return list(old_dec_fn(param_kwargs)) + list(new_dec_fn(param_kwargs)) yield (new_test, merged_test_name, full_param_kwargs, merged_decorator_fn) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index 02e430ed616c5..6ae58240ea75a 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -96,26 +96,20 @@ def __init__( self.dtypes = dtypes self.active_if = active_if - print("init decorators: {} {} {} {} {}".format(self.cls_name, self.test_name, self.device_type, self.dtypes, self.active_if)) - + # Validate dtypes if self.dtypes is not None: for dtype in self.dtypes: assert isinstance(dtype, torch.dtype) def is_active(self, cls_name, test_name, device_type, dtype, param_kwargs): - print("is_active: {} {} {} {} {} {}".format(self.decorators, self.active_if, - (self.cls_name is None or self.cls_name == cls_name), - (self.test_name is None or self.test_name == test_name), - (self.device_type is None or self.device_type == device_type), - (self.dtypes is None or dtype in self.dtypes))) - print("is_active details: {} {} {} {}".format(self.cls_name, cls_name, self.test_name, test_name)) + return ( self.active_if and (self.cls_name is None or self.cls_name == cls_name) and (self.test_name is None or self.test_name == test_name) and (self.device_type is None or self.device_type == device_type) - and (self.dtypes is None or G in self.dtypes) + and (self.dtypes is None or dtype in self.dtypes) # Support callables over kwargs to determine if the decorator is active. and ( self.active_if(param_kwargs) From f5cbd50068fae398c01c8e200e560118118fdbb7 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 12 May 2024 22:28:04 -0700 Subject: [PATCH 03/37] clean up code --- test/test_ops.py | 1699 ++++++++--------- .../_internal/common_methods_invocations.py | 1 - torch/testing/_internal/opinfo/core.py | 7 +- 3 files changed, 851 insertions(+), 856 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 4b665336c1a50..5a8d9bc461f8d 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -136,11 +136,11 @@ def reduction_dtype_filter(op): aten = torch.ops.aten -_xpu_computation_op_list = ["_refs.abs", "_refs.all", "item", "abs", "add", "_refs.fill"] -_xpu_computation_op_list = ["abs"] -_xpu_computation_ops = [ - op for op in ops_and_refs if op.name in _xpu_computation_op_list -] +# _xpu_computation_op_list = ["_refs.abs", "_refs.all", "item", "abs", "add", "_refs.fill"] +# _xpu_computation_op_list = ["abs"] +# _xpu_computation_ops = [ +# op for op in ops_and_refs if op.name in _xpu_computation_op_list +# ] # Tests that apply to all operators and aren't related to any particular # system @@ -309,8 +309,7 @@ def to_cpu(arg): return arg.to(device="cpu") return arg - #import pdb - #pdb.set_trace() + samples = op.reference_inputs(device, dtype) for sample in samples: @@ -1840,848 +1839,848 @@ def check_cow_input( allow_list=op.allow_cow_input_materialize_backward, ) -# @ops(op_db, allowed_dtypes=(torch.float,)) -# def test_view_replay(self, device, dtype, op): -# def _assert_match_metadata(a, b): -# self.assertEqual(a.size(), b.size()) -# self.assertEqual(a.stride(), b.stride()) -# self.assertEqual(a.storage_offset(), b.storage_offset()) -# self.assertEqual(a.device, b.device) -# self.assertEqual(a.dtype, b.dtype) - -# # ensure view replay is enabled -# with torch.autograd._force_original_view_tracking(True): -# for sample in op.sample_inputs(device, dtype, requires_grad=False): -# inp = sample.input -# outs = op(inp, *sample.args, **sample.kwargs) -# if not isinstance(outs, (tuple, List)): -# outs = [outs] - -# # for all outputs that are views of the input, we should be able to replay the -# # forward and reverse views via a functioning view_func() / rev_view_func(). -# for out in outs: -# if not ( -# isinstance(out, torch.Tensor) -# and out._is_view() -# and out._base is inp -# ): -# continue - -# # forward view_func -# new_inp = inp.clone() -# _assert_match_metadata(new_inp, inp) -# new_out = out._view_func_unsafe(new_inp) -# _assert_match_metadata(new_out, out) -# self.assertEqual(new_out, out) - -# # reverse view_func -# new_out = out.detach() -# new_inp = out._rev_view_func_unsafe(new_out) -# _assert_match_metadata(new_inp, inp) -# self.assertTrue(new_inp._is_view()) -# self.assertTrue(new_inp._base is new_out) - - -# @unMarkDynamoStrictTest -# class TestMathBits(TestCase): -# # Tests that -# # 1. The operator's output for physically conjugated/negated tensors and conjugate/negative view tensors -# # produces the same value -# # 2. The gradients are same in both cases mentioned in (1) -# # 3. If the operator's inplace variant is supported, tests that the inplace operation -# # produces the correct value when called on a conjugate/negative view tensor and that the output -# # has its conj/neg bit set to true -# # This test only runs for C -> R and C -> C functions -# # TODO: add tests for `R->C` functions -# # Note: This test runs for functions that take both tensors and tensorlists as input. -# def _test_math_view( -# self, -# device, -# dtype, -# op, -# samples, -# math_op_physical, -# math_op_view, -# is_bit_set, -# out_type, -# ): -# inplace_variant = op.inplace_variant - -# # helper function to clone and conjugate/negate the input if its a tensor -# # else clone the sequence and conjugate/negate the first element in the sequence -# # If a requires_grad argument is provided the tensor being conjugated/negated will -# # have its requires_grad set to that value. -# def clone_and_perform_view(input, **kwargs): -# if isinstance(input, torch.Tensor): -# requires_grad = kwargs.get("requires_grad", input.requires_grad) -# with torch.no_grad(): -# # Ensure view represents the original sample input -# input = math_op_physical(input) -# # Note: .conj() is not called under no_grad mode since it's not allowed to modify a -# # view created in no_grad mode. Here it's ok to do so, so as a workaround we call conj -# # before resetting the requires_grad field for input -# input = math_op_view(input) -# assert input.is_leaf -# return input.requires_grad_(requires_grad) - -# if isinstance(input, Sequence): -# out = list(map(clone_input_helper, input)) -# out[0] = clone_and_perform_view(out[0]) -# return tuple(out) - -# for sample in samples: -# tensor = ( -# sample.input -# if isinstance(sample.input, torch.Tensor) -# else sample.input[0] -# ) -# cloned1 = clone_and_perform_view(sample.input) - -# # Computes function forward value with a physically conjugated/negated tensor and -# # a conj/neg view tensor and verifies that the output in both case are equal. -# expected_forward = op(sample.input, *sample.args, **sample.kwargs) -# forward_with_mathview = op(cloned1, *sample.args, **sample.kwargs) -# self.assertEqual(expected_forward, forward_with_mathview) - -# # If the op has an inplace variant, and the input doesn't require broadcasting -# # and has the same dtype as output, verify that the inplace operation on a conjugated/negated -# # input produces correct output, and the output tensor has the conj/neg bit set to True -# if inplace_variant is not None and not sample.broadcasts_input: -# cloned2 = clone_and_perform_view(tensor, requires_grad=False) -# if ( -# isinstance(expected_forward, torch.Tensor) -# and expected_forward.dtype is tensor.dtype -# ): -# inplace_forward = inplace_variant( -# cloned2, *sample.args, **sample.kwargs -# ) -# self.assertTrue(is_bit_set(inplace_forward)) -# self.assertEqual(inplace_forward, expected_forward) - -# # TODO: backward consistency only supported for single tensor outputs -# # TODO: backward consistency only checked on sample.input, not all -# # tensor inputs -# # TODO: update to handle checking grads of all tensor inputs as -# # derived from each tensor output -# if ( -# isinstance(expected_forward, torch.Tensor) -# and expected_forward.requires_grad -# ): -# output_process_fn_grad = sample.output_process_fn_grad or (lambda x: x) -# expected_forward = output_process_fn_grad(expected_forward) -# forward_with_mathview = output_process_fn_grad(forward_with_mathview) - -# tensor = ( -# sample.input -# if isinstance(sample.input, torch.Tensor) -# else sample.input[0] -# ) -# expected_forward.sum().abs().backward(retain_graph=True) -# forward_with_mathview.sum().abs().backward(retain_graph=True) -# if tensor.grad is not None: -# cloned1_tensor = ( -# cloned1 if isinstance(cloned1, torch.Tensor) else cloned1[0] -# ) -# self.assertEqual(tensor.grad, cloned1_tensor.grad) - -# tensor.grad, cloned1_tensor.grad = None, None - -# # a repeat of the above test if output is not complex valued -# if out_type(expected_forward): -# grad = torch.randn_like(expected_forward) -# expected_forward.backward(grad) -# forward_with_mathview.backward( -# math_op_view(math_op_physical(grad)) -# ) - -# self.assertEqual(tensor.grad, cloned1_tensor.grad) - -# @ops(ops_and_refs, allowed_dtypes=(torch.cfloat,)) -# def test_conj_view(self, device, dtype, op): -# if not op.test_conjugated_samples: -# self.skipTest("Operation doesn't support conjugated inputs.") -# math_op_physical = torch.conj_physical -# math_op_view = torch.conj -# _requires_grad = torch.cfloat in op.supported_backward_dtypes( -# torch.device(device).type -# ) -# is_bit_set = torch.is_conj -# samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) -# self._test_math_view( -# device, -# dtype, -# op, -# samples, -# math_op_physical, -# math_op_view, -# is_bit_set, -# torch.is_complex, -# ) - -# @ops(ops_and_refs, allowed_dtypes=(torch.double,)) -# def test_neg_view(self, device, dtype, op): -# if not op.test_neg_view: -# self.skipTest("Operation not tested with tensors with negative bit.") -# math_op_physical = torch.neg -# math_op_view = torch._neg_view -# is_bit_set = torch.is_neg -# samples = op.sample_inputs(device, dtype, requires_grad=op.supports_autograd) -# self._test_math_view( -# device, -# dtype, -# op, -# samples, -# math_op_physical, -# math_op_view, -# is_bit_set, -# lambda x: True, -# ) - -# @ops(ops_and_refs, allowed_dtypes=(torch.cdouble,)) -# def test_neg_conj_view(self, device, dtype, op): -# if not op.test_neg_view: -# self.skipTest("Operation not tested with tensors with negative bit.") -# if not op.test_conjugated_samples: -# self.skipTest("Operation doesn't support conjugated inputs.") - -# def math_op_physical(x): -# return -x.conj_physical() - -# def math_op_view(x): -# return torch._neg_view(x).conj() - -# def is_bit_set(x): -# return torch.is_neg(x) and torch.is_conj(x) - -# _requires_grad = dtype in op.supported_backward_dtypes( -# torch.device(device).type -# ) -# samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) -# # Only test one sample -# samples = itertools.islice(samples, 1) -# self._test_math_view( -# device, -# dtype, -# op, -# samples, -# math_op_physical, -# math_op_view, -# is_bit_set, -# torch.is_complex, -# ) - - -# # input strides and size may have been altered due to the result of an inplace op -# def check_inplace_view(func, input, rs, input_size, input_strides): -# if func is None: -# return -# # TODO: extend this test to test ops with multiple outputs and ops like native_batch_norm(_legit).out -# # which mutate not necessarily the first input. -# if isinstance(rs, torch.Tensor) and rs is input: -# unequal_size = rs.size() != input_size -# unequal_strides = rs.stride() != input_strides -# # resize_ should probably have inplace_view tag. Not adding the tag since it -# # breaks some codegen logic -# if unequal_size or unequal_strides: -# if isinstance(func, torch._ops.OpOverloadPacket): -# func = func.default -# # Reference: https://github.com/pytorch/pytorch/issues/78759 -# if func is not torch.ops.aten.resize_.default: -# # TODO: use self.assertIn when we have separate tests for each tag -# assert torch.Tag.inplace_view in func.tags - - -# # A mode that when enabled runs correctness checks to ensure -# # that operators have expected tags based on their input and -# # output tensor properties -# class TestTagsMode(TorchDispatchMode): -# def __torch_dispatch__(self, func, types, args=(), kwargs=None): -# if isinstance(args[0], torch.Tensor): -# old_size = args[0].size() -# old_stride = args[0].stride() -# rs = func(*args, **kwargs) -# check_inplace_view(func, args[0], rs, old_size, old_stride) -# else: -# rs = func(*args, **kwargs) -# return rs - - -# # Test to verify the correctness for tags in `tags.yaml`, also available for access through `torch.Tags` -# @unMarkDynamoStrictTest -# class TestTags(TestCase): -# @onlyCPU -# @ops(ops_and_refs, dtypes=OpDTypes.any_one) -# def test_tags(self, device, dtype, op): -# samples = op.sample_inputs(device, dtype, requires_grad=False) -# for sample in samples: -# # TODO: Test tags for ops that return a list of tensors -# input = sample.input -# if isinstance(input, torch.Tensor): -# old_size = input.size() -# old_stride = input.stride() -# with TestTagsMode(): -# rs = op(input, *sample.args, **sample.kwargs) -# # TODO: add test for aliases: https://github.com/pytorch/pytorch/issues/78761 -# aten_name = op.aten_name if op.aten_name is not None else op.name -# opoverloadpacket = getattr(torch.ops.aten, aten_name, None) -# check_inplace_view(opoverloadpacket, input, rs, old_size, old_stride) - - -# class TestSelfKwarg(TestCase): -# def test_self_kwargs(self): -# """Verify that we can call the aten ops with all kwargs even if the -# argument's name is "self" -# """ -# torch.ops.aten.reshape.default(self=torch.rand(1, 2), shape=[2]) -# torch.ops.aten.min.default(self=torch.rand(100)) - - -# @unMarkDynamoStrictTest -# class TestRefsOpsInfo(TestCase): -# import_paths = [ -# "_refs", -# "_refs.special", -# "_refs.nn.functional", -# "_refs.fft", -# "_refs._conversions", -# ] -# module_alls = [ -# (path, import_module(f"torch.{path}").__all__) for path in import_paths -# ] -# ref_ops_names = tuple( -# itertools.chain.from_iterable( -# [f"{path}.{op}" for op in module_all] for path, module_all in module_alls -# ) -# ) -# ref_db_names = {ref_op.name for ref_op in python_ref_db} - -# # TODO: References that do not have an entry in python_ref_db -# skip_ref_ops = { -# "_refs.alias", -# "_refs.bitwise_right_shift", -# "_refs.copy_to", -# "_refs.empty_permuted", -# "_refs.empty_strided", -# "_refs.equal", -# "_refs.full", -# "_refs.full_like", -# "_refs.is_complex", -# "_refs.to", -# "_refs.mvlgamma", -# "_refs.ones", -# "_refs.ones_like", -# "_refs.special.expit", -# "_refs.std_var", -# "_refs.swap_axes", -# "_refs.uniform", -# "_refs.scalar_tensor", -# "_refs.trunc_divide", -# "_refs.zero", -# "_refs.zeros", -# "_refs.zeros_like", -# "_refs.rfloordiv", -# "_refs.rtruediv", -# "_refs.rpow", -# # These should be tested with their out-of-place counterparts -# "_refs.index_add_", -# "_refs.index_copy_", -# "_refs.index_fill_", -# "_refs.native_group_norm", -# } - -# not_in_decomp_table = { -# # duplicated in _decomp and _refs -# "_refs.nn.functional.group_norm", -# "_refs.nn.functional.mse_loss", -# "_refs.floor_divide", -# # duplicated as refs do not have decent support for advanced indexing -# "_refs.index_copy", -# "_refs.index_copy_", -# "_refs.index_add", -# "_refs.index_add_", -# # these are not aten ops? -# "_refs._conversions.bfloat16", -# "_refs._conversions.bool", -# "_refs._conversions.byte", -# "_refs._conversions.char", -# "_refs._conversions.double", -# "_refs._conversions.float", -# "_refs._conversions.half", -# "_refs._conversions.int", -# "_refs._conversions.long", -# "_refs._conversions.short", -# "_refs._conversions.chalf", -# "_refs._conversions.cfloat", -# "_refs._conversions.cdouble", -# "_refs.broadcast_shapes", -# "_refs.broadcast_tensors", -# "_refs.mvlgamma", -# "_refs.nn.functional.layer_norm", -# "_refs.nn.functional.tanhshrink", -# "_refs.nn.functional.triplet_margin_loss", -# "_refs.rfloordiv", -# "_refs.rtruediv", -# "_refs.rpow", -# # CompositeImplicitAutograd -# "_refs.allclose", -# "_refs.atleast_1d", -# "_refs.atleast_2d", -# "_refs.atleast_3d", -# "_refs.broadcast_to", -# "_refs.chunk", -# "_refs.column_stack", -# "_refs.contiguous", -# "_refs.dsplit", -# "_refs.dstack", -# "_refs.fill", -# "_refs.fill_", -# "_refs.flatten", -# "_refs.fliplr", -# "_refs.flipud", -# "_refs.float_power", -# "_refs.hsplit", -# "_refs.hstack", -# "_refs.isclose", -# "_refs.isfinite", -# "_refs.isreal", -# "_refs.istft", -# "_refs.log_softmax", -# "_refs.movedim", -# "_refs.narrow", -# "_refs.nn.functional.dropout", -# "_refs.nn.functional.l1_loss", -# "_refs.nn.functional.smooth_l1_loss", -# "_refs.nn.functional.log_softmax", -# "_refs.nn.functional.poisson_nll_loss", -# "_refs.nn.functional.softmax", -# "_refs.nn.functional.softmin", -# "_refs.positive", -# "_refs.ravel", -# "_refs.reshape", -# "_refs.softmax", -# "_refs.special.expit", -# "_refs.special.log_softmax", -# "_refs.special.softmax", -# "_refs.square", -# "_refs.stft", -# "_refs.T", -# "_refs.take_along_dim", -# "_refs.tensor_split", -# "_refs.to", -# "_refs.true_divide", -# "_refs.trunc_divide", -# "_refs.vsplit", -# "_refs.vstack", -# "_refs.linalg.matrix_norm", -# "_refs.linalg.norm", -# "_refs.linalg.svd", -# "_refs.linalg.svdvals", -# "_refs.unflatten", -# "_refs.sum_to_size", -# # ref implementation missing kwargs -# "_refs.full_like", # missing "layout" -# "_refs.scalar_tensor", # missing "layout" -# # other -# "_refs.block_diag", # only refs._block_diag_iterable is in decomposition table -# "_refs.empty", # intentional; direct empty is faster and has less guards -# "_refs.empty_permuted", # intentional; direct empty is faster and has less guards -# "_refs.expand_as", -# "_refs.as_strided", # _prims._as_strided_meta: "reduce() of empty sequence with no initial value" -# "_refs.copy_to", # torch._C._jit_get_operation: No such operator aten::copy_to -# "_refs.equal", # 'bool' object has no attribute 'dtype' -# "_refs.conj", # Calls _prims.conj -# "_refs.real", -# "_refs.imag", -# "_refs.reshape_as", -# "_refs.view_as", -# "_refs.view_as_complex", # TorchInductor does not support complex at the moment. -# # the decompositions for these ops are slightly different -# # because of out handling -# "_refs.var_mean", -# "_refs.std_mean", -# "_refs.native_layer_norm", -# } - -# @parametrize("op", ref_ops_names) -# def test_refs_are_in_python_ref_db(self, op): -# inplace = op[-1] == "_" -# if op in self.skip_ref_ops: -# raise unittest.SkipTest(f"{op} does not have an entry in python_ref_db") -# elif inplace: -# self.assertNotIn( -# op, -# self.ref_db_names, -# msg=f"{op} is an in-place operation and should not have an OpInfo", -# ) -# else: -# # Intentionally don't use assertIn to avoid printing the -# # (very large) container -# self.assertTrue(op in self.ref_db_names, msg=f"{op} not in ref_db_names") - -# @parametrize("op", ref_ops_names) -# def test_refs_are_in_decomp_table(self, op): -# path = op.split(".") -# module_path = ".".join(path[:-1]) -# op_name = path[-1] -# op_impl = getattr(import_module(f"torch.{module_path}"), op_name) - -# if op in self.not_in_decomp_table: -# self.assertNotIn( -# op_impl, -# torch._decomp.decomposition_table.values(), -# f"Unexpectedly found {op} in torch._decomp.decomposition_table.values()", -# ) -# else: -# self.assertIn( -# op_impl, -# torch._decomp.decomposition_table.values(), -# f"Did not find {op} in torch._decomp.decomposition_table.values()", -# ) - - -# fake_skips = ( -# "aminmax", # failing input -# "cov", # aweights cannot be negtaive -# "istft", # window overlap add min: 0 -# "linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet -# "linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend -# "linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend -# # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen -# "linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend -# "linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor -# "linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend -# "linalg.tensorsolve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' -# "lu_solve", # MALLOC ERROR: debug -# "multinomial", # Could not run 'aten::multinomial' with arguments from the 'Meta' backend -# "mvlgamma.mvlgamma_p_1", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend -# "mvlgamma.mvlgamma_p_3", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend -# "mvlgamma.mvlgamma_p_5", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend -# "nanmean", # logical_not() got an unexpected keyword argument 'out' -# "quantile", # quantile() q values must be in the range [0, 1] -# "nanquantile", # quantile() q values must be in the range [0, 1] -# "nn.functional.ctc_loss", # The tensor has a non-zero number of elements, but its data is not allocated yet -# "nn.functional.embedding_bag", # sometimes errors -# "nn.functional.nll_loss", # sometimes errors -# "nn.functional.max_pool1d", # The tensor has a non-zero number of elements -# "to_sparse", # Could not run 'aten::_to_sparse' with arguments from the 'Meta' backend -# "tensor_split", # The tensor has a non-zero number of elements, but its data is not allocated yet -# "repeat_interleave", # cannot repeat_interleave a meta tensor without output_size -# "sparse.sampled.addmm", # sparsity not supported -# # Can not infer total number of classes from meta. no way at present to throw DynamicOutputShapeException -# "nn.functional.one_hot", -# "narrow", # Fails only for one overload with DataDependentOutputException (hence skip). -# ) - -# fake_autocast_device_skips = defaultdict(dict) - -# # TODO: investigate/fix -# fake_autocast_device_skips["cpu"] = {"linalg.pinv"} - - -# dynamic_output_op_tests = ( -# "argwhere", -# "bincount", -# "combinations", -# "linalg.lstsq", -# "masked_select", -# "nonzero", -# "unique_consecutive", -# "unique", -# "linalg.lstsq.grad_oriented", -# ) - -# # Ops that have dynamic output shapes that we can handle when -# # allow_dynamic_shape_ops is True in fake tensor shape environment. -# supported_dynamic_output_op_tests = ( -# "nonzero", -# "unique", -# "repeat_interleave", -# "masked_select", -# ) - -# # some inputs invoke dynamic output shape operators, some do not -# sometimes_dynamic_output_op_test = ( -# "__getitem__", -# "index_select", -# ) - -# data_dependent_op_tests = ( -# "equal", -# "corrcoef", -# "nn.functional.gaussian_nll_loss", -# "allclose", -# ) - -# aliasing_failures = ("histogramdd",) - -# fake_backward_skips = { -# "linalg.cond", -# "linalg.matrix_norm", -# "linalg.norm", -# "linalg.svd", -# "linalg.svdvals", -# "pca_lowrank", -# "roll", -# "svd_lowrank", -# "sgn", -# } - -# fake_backward_xfails = {skip(s) for s in fake_backward_skips} | { -# xfail("fft.ihfftn"), # Mismatch in aten._conj_physical.default -# xfail("fft.ihfft2"), # Mismatch in aten._conj_physical.default -# skip("nn.functional.ctc_loss"), -# } - -# fake_autocast_backward_xfails = { -# skip("nn.functional.binary_cross_entropy"), -# skip("sparse.sampled_addmm"), -# skip("linalg.pinv"), -# skip("linalg.pinv", "hermitian"), -# skip("linalg.pinv", "singular"), -# skip("pinverse"), -# } - - -# @unMarkDynamoStrictTest -# class TestFakeTensor(TestCase): -# def setUp(self): -# # Turn on FakeTensor caching and cross-checking for these tests: -# cache_enabled = unittest.mock.patch( -# "torch._dynamo.config.fake_tensor_cache_enabled", True -# ) -# cache_enabled.start() -# self.addCleanup(cache_enabled.stop) - -# cache_crosscheck = unittest.mock.patch( -# "torch._dynamo.config.fake_tensor_cache_crosscheck_enabled", True -# ) -# cache_crosscheck.start() -# self.addCleanup(cache_crosscheck.stop) - -# def _test_fake_helper(self, device, dtype, op, context): -# name = op.name -# if op.variant_test_name: -# name += "." + op.variant_test_name -# if name in fake_skips or "sparse" in name or "jiterator" in name: -# self.skipTest("Skip failing test") - -# samples = op.sample_inputs(device, dtype, requires_grad=False) -# for sample in samples: -# mode = FakeTensorMode() - -# from torch.fx.experimental.symbolic_shapes import ShapeEnv - -# allow_dynamic_output_shape_shape_env = ShapeEnv( -# allow_dynamic_output_shape_ops=True -# ) - -# allow_dynamic_output_shape_mode = FakeTensorMode( -# shape_env=allow_dynamic_output_shape_shape_env -# ) - -# try: -# with context(): -# res = op(sample.input, *sample.args, **sample.kwargs) -# except Exception: -# continue - -# def run_with_fake_mode_and_verify(fake_mode, match_results=True): -# def map_to_fake(e): -# if isinstance(e, torch.Tensor): -# return fake_mode.from_tensor(e) -# else: -# return e - -# input = tree_map(map_to_fake, sample.input) -# args = tree_map(map_to_fake, sample.args) -# kwargs = tree_map(map_to_fake, sample.kwargs) - -# try: -# with context(): -# with fake_mode: -# res_fake = op(input, *args, **kwargs) - -# if not match_results: -# return - -# for fake_out, real_out in zip( -# pytree.tree_leaves(res_fake), pytree.tree_leaves(res) -# ): -# if not isinstance(fake_out, torch.Tensor): -# self.assertTrue(not isinstance(real_out, torch.Tensor)) -# self.assertEqual(fake_out, real_out) -# continue - -# self.assertTrue(isinstance(fake_out, FakeTensor)) -# # if you see a shape exception here, you may need to add -# # a `dynamic_output_shape` tag to an operator - -# # prims/decomps must correctly model strides, -# # see https://github.com/pytorch/pytorch/issues/78050#issuecomment-1253950325 -# prims.utils.compare_tensor_meta(fake_out, real_out, True) - -# if name not in aliasing_failures: -# fake_aliasing = outputs_alias_inputs( -# (input, args, kwargs), res_fake -# ) -# real_aliasing = outputs_alias_inputs( -# (sample.input, sample, args, sample.kwargs), res -# ) -# self.assertEqual(fake_aliasing, real_aliasing) - -# self.assertTrue( -# name not in dynamic_output_op_tests -# and name not in data_dependent_op_tests -# ) - -# except torch._subclasses.fake_tensor.UnsupportedFakeTensorException: -# pass -# except torch._subclasses.fake_tensor.UnsupportedOperatorException: -# pass -# except torch._subclasses.fake_tensor.DynamicOutputShapeException: -# self.assertTrue( -# name in dynamic_output_op_tests -# or name in sometimes_dynamic_output_op_test -# ) -# self.assertTrue( -# mode.shape_env is None -# or not mode.shape_env.allow_dynamic_output_shape_ops -# or name not in supported_dynamic_output_op_tests -# ) -# except torch._subclasses.fake_tensor.DataDependentOutputException: -# self.assertTrue(name in data_dependent_op_tests) - -# run_with_fake_mode_and_verify(mode) -# if name in supported_dynamic_output_op_tests: -# run_with_fake_mode_and_verify( -# allow_dynamic_output_shape_mode, match_results=False -# ) - -# @ops(op_db, dtypes=OpDTypes.any_one) -# def test_pointwise_ops(self, device, dtype, op): -# name = op.name -# if op.variant_test_name: -# name += "." + op.variant_test_name -# if name in fake_skips or "sparse" in name or "jiterator" in name: -# self.skipTest("Skip failing test") - -# test_self = self - -# class TestPointwiseMode(TorchDispatchMode): -# def __torch_dispatch__(self, func, types, args=(), kwargs=None): -# kwargs = kwargs or {} - -# out = func(*args, **kwargs) - -# if torch.Tag.pointwise in func.tags: -# shapes = [] -# for inp in pytree.arg_tree_leaves(*args, **kwargs): -# if isinstance(inp, torch.Tensor): -# shapes.append(inp.shape) - -# out_shape = torch._refs._broadcast_shapes(*shapes) - -# for out_elem in pytree.tree_leaves(out): -# if isinstance(out_elem, torch.Tensor): -# test_self.assertEqual(out_elem.shape, out_shape) - -# return out - -# samples = op.sample_inputs(device, dtype, requires_grad=False) -# for sample in samples: -# mode = FakeTensorMode() - -# def map_to_fake(e): -# if isinstance(e, torch.Tensor): -# return mode.from_tensor(e) -# else: -# return e - -# input = tree_map(map_to_fake, sample.input) -# args = tree_map(map_to_fake, sample.args) -# kwargs = tree_map(map_to_fake, sample.kwargs) - -# try: -# op(input, *args, **kwargs) -# except Exception as e: -# continue - -# with TestPointwiseMode(): -# with mode: -# op(input, *args, **kwargs) - -# @ops(op_db, dtypes=OpDTypes.any_one) -# def test_fake(self, device, dtype, op): -# self._test_fake_helper(device, dtype, op, contextlib.nullcontext) - -# @ops(op_db, dtypes=OpDTypes.any_one) -# def test_fake_autocast(self, device, dtype, op): -# if op.name in fake_autocast_device_skips[device]: -# self.skipTest("Skip failing test") -# context = ( -# torch.cuda.amp.autocast if device == "cuda" else torch.cpu.amp.autocast -# ) -# self._test_fake_helper(device, dtype, op, context) - -# def _test_fake_crossref_helper(self, device, dtype, op, context): -# samples = op.sample_inputs(device, dtype, requires_grad=True) - -# for iter, sample in enumerate(samples): -# args = [sample.input] + list(sample.args) -# kwargs = sample.kwargs - -# # skip these to speed up tests -# common_skip_ops = ( -# aten.detach.default, -# aten.empty_strided.default, -# aten.copy_.default, -# aten.is_same_size.default, -# ) - -# # TODO: enable check_aliasing, batch norm fails -# try: -# with torch._subclasses.CrossRefFakeMode( -# ignore_op_fn=lambda fn: fn in common_skip_ops, check_aliasing=True -# ): -# with warnings.catch_warnings(), context(), torch.autograd.set_multithreading_enabled( -# False -# ): -# composite_compliance.compute_expected_grads( -# op.get_op(), -# args, -# kwargs, -# sample.output_process_fn_grad, -# op.gradcheck_wrapper, -# ) -# except torch._subclasses.fake_tensor.UnsupportedOperatorException: -# pass - -# @onlyCUDA -# @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) -# @skipOps( -# "TestFakeTensor", "test_fake_crossref_backward_no_amp", fake_backward_xfails -# ) -# def test_fake_crossref_backward_no_amp(self, device, dtype, op): -# self._test_fake_crossref_helper(device, dtype, op, contextlib.nullcontext) - -# @onlyCUDA -# @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) -# @skipOps( -# "TestFakeTensor", -# "test_fake_crossref_backward_amp", -# fake_backward_xfails | fake_autocast_backward_xfails, -# ) -# def test_fake_crossref_backward_amp(self, device, dtype, op): -# self._test_fake_crossref_helper(device, dtype, op, torch.cuda.amp.autocast) - -# @ops([op for op in ops_and_refs if op.is_factory_function]) -# def test_strided_layout(self, device, dtype, op): -# samples = op.sample_inputs(device, dtype) -# for sample in samples: -# kwargs = sample.kwargs.copy() -# kwargs["layout"] = torch.strided -# strided_result = op(sample.input, *sample.args, **kwargs) -# self.assertEqual(strided_result.layout, torch.strided) + @ops(op_db, allowed_dtypes=(torch.float,)) + def test_view_replay(self, device, dtype, op): + def _assert_match_metadata(a, b): + self.assertEqual(a.size(), b.size()) + self.assertEqual(a.stride(), b.stride()) + self.assertEqual(a.storage_offset(), b.storage_offset()) + self.assertEqual(a.device, b.device) + self.assertEqual(a.dtype, b.dtype) + + # ensure view replay is enabled + with torch.autograd._force_original_view_tracking(True): + for sample in op.sample_inputs(device, dtype, requires_grad=False): + inp = sample.input + outs = op(inp, *sample.args, **sample.kwargs) + if not isinstance(outs, (tuple, List)): + outs = [outs] + + # for all outputs that are views of the input, we should be able to replay the + # forward and reverse views via a functioning view_func() / rev_view_func(). + for out in outs: + if not ( + isinstance(out, torch.Tensor) + and out._is_view() + and out._base is inp + ): + continue + + # forward view_func + new_inp = inp.clone() + _assert_match_metadata(new_inp, inp) + new_out = out._view_func_unsafe(new_inp) + _assert_match_metadata(new_out, out) + self.assertEqual(new_out, out) + + # reverse view_func + new_out = out.detach() + new_inp = out._rev_view_func_unsafe(new_out) + _assert_match_metadata(new_inp, inp) + self.assertTrue(new_inp._is_view()) + self.assertTrue(new_inp._base is new_out) + + +@unMarkDynamoStrictTest +class TestMathBits(TestCase): + # Tests that + # 1. The operator's output for physically conjugated/negated tensors and conjugate/negative view tensors + # produces the same value + # 2. The gradients are same in both cases mentioned in (1) + # 3. If the operator's inplace variant is supported, tests that the inplace operation + # produces the correct value when called on a conjugate/negative view tensor and that the output + # has its conj/neg bit set to true + # This test only runs for C -> R and C -> C functions + # TODO: add tests for `R->C` functions + # Note: This test runs for functions that take both tensors and tensorlists as input. + def _test_math_view( + self, + device, + dtype, + op, + samples, + math_op_physical, + math_op_view, + is_bit_set, + out_type, + ): + inplace_variant = op.inplace_variant + + # helper function to clone and conjugate/negate the input if its a tensor + # else clone the sequence and conjugate/negate the first element in the sequence + # If a requires_grad argument is provided the tensor being conjugated/negated will + # have its requires_grad set to that value. + def clone_and_perform_view(input, **kwargs): + if isinstance(input, torch.Tensor): + requires_grad = kwargs.get("requires_grad", input.requires_grad) + with torch.no_grad(): + # Ensure view represents the original sample input + input = math_op_physical(input) + # Note: .conj() is not called under no_grad mode since it's not allowed to modify a + # view created in no_grad mode. Here it's ok to do so, so as a workaround we call conj + # before resetting the requires_grad field for input + input = math_op_view(input) + assert input.is_leaf + return input.requires_grad_(requires_grad) + + if isinstance(input, Sequence): + out = list(map(clone_input_helper, input)) + out[0] = clone_and_perform_view(out[0]) + return tuple(out) + + for sample in samples: + tensor = ( + sample.input + if isinstance(sample.input, torch.Tensor) + else sample.input[0] + ) + cloned1 = clone_and_perform_view(sample.input) + + # Computes function forward value with a physically conjugated/negated tensor and + # a conj/neg view tensor and verifies that the output in both case are equal. + expected_forward = op(sample.input, *sample.args, **sample.kwargs) + forward_with_mathview = op(cloned1, *sample.args, **sample.kwargs) + self.assertEqual(expected_forward, forward_with_mathview) + + # If the op has an inplace variant, and the input doesn't require broadcasting + # and has the same dtype as output, verify that the inplace operation on a conjugated/negated + # input produces correct output, and the output tensor has the conj/neg bit set to True + if inplace_variant is not None and not sample.broadcasts_input: + cloned2 = clone_and_perform_view(tensor, requires_grad=False) + if ( + isinstance(expected_forward, torch.Tensor) + and expected_forward.dtype is tensor.dtype + ): + inplace_forward = inplace_variant( + cloned2, *sample.args, **sample.kwargs + ) + self.assertTrue(is_bit_set(inplace_forward)) + self.assertEqual(inplace_forward, expected_forward) + + # TODO: backward consistency only supported for single tensor outputs + # TODO: backward consistency only checked on sample.input, not all + # tensor inputs + # TODO: update to handle checking grads of all tensor inputs as + # derived from each tensor output + if ( + isinstance(expected_forward, torch.Tensor) + and expected_forward.requires_grad + ): + output_process_fn_grad = sample.output_process_fn_grad or (lambda x: x) + expected_forward = output_process_fn_grad(expected_forward) + forward_with_mathview = output_process_fn_grad(forward_with_mathview) + + tensor = ( + sample.input + if isinstance(sample.input, torch.Tensor) + else sample.input[0] + ) + expected_forward.sum().abs().backward(retain_graph=True) + forward_with_mathview.sum().abs().backward(retain_graph=True) + if tensor.grad is not None: + cloned1_tensor = ( + cloned1 if isinstance(cloned1, torch.Tensor) else cloned1[0] + ) + self.assertEqual(tensor.grad, cloned1_tensor.grad) + + tensor.grad, cloned1_tensor.grad = None, None + + # a repeat of the above test if output is not complex valued + if out_type(expected_forward): + grad = torch.randn_like(expected_forward) + expected_forward.backward(grad) + forward_with_mathview.backward( + math_op_view(math_op_physical(grad)) + ) + + self.assertEqual(tensor.grad, cloned1_tensor.grad) + + @ops(ops_and_refs, allowed_dtypes=(torch.cfloat,)) + def test_conj_view(self, device, dtype, op): + if not op.test_conjugated_samples: + self.skipTest("Operation doesn't support conjugated inputs.") + math_op_physical = torch.conj_physical + math_op_view = torch.conj + _requires_grad = torch.cfloat in op.supported_backward_dtypes( + torch.device(device).type + ) + is_bit_set = torch.is_conj + samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) + self._test_math_view( + device, + dtype, + op, + samples, + math_op_physical, + math_op_view, + is_bit_set, + torch.is_complex, + ) + + @ops(ops_and_refs, allowed_dtypes=(torch.double,)) + def test_neg_view(self, device, dtype, op): + if not op.test_neg_view: + self.skipTest("Operation not tested with tensors with negative bit.") + math_op_physical = torch.neg + math_op_view = torch._neg_view + is_bit_set = torch.is_neg + samples = op.sample_inputs(device, dtype, requires_grad=op.supports_autograd) + self._test_math_view( + device, + dtype, + op, + samples, + math_op_physical, + math_op_view, + is_bit_set, + lambda x: True, + ) + + @ops(ops_and_refs, allowed_dtypes=(torch.cdouble,)) + def test_neg_conj_view(self, device, dtype, op): + if not op.test_neg_view: + self.skipTest("Operation not tested with tensors with negative bit.") + if not op.test_conjugated_samples: + self.skipTest("Operation doesn't support conjugated inputs.") + + def math_op_physical(x): + return -x.conj_physical() + + def math_op_view(x): + return torch._neg_view(x).conj() + + def is_bit_set(x): + return torch.is_neg(x) and torch.is_conj(x) + + _requires_grad = dtype in op.supported_backward_dtypes( + torch.device(device).type + ) + samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) + # Only test one sample + samples = itertools.islice(samples, 1) + self._test_math_view( + device, + dtype, + op, + samples, + math_op_physical, + math_op_view, + is_bit_set, + torch.is_complex, + ) + + +# input strides and size may have been altered due to the result of an inplace op +def check_inplace_view(func, input, rs, input_size, input_strides): + if func is None: + return + # TODO: extend this test to test ops with multiple outputs and ops like native_batch_norm(_legit).out + # which mutate not necessarily the first input. + if isinstance(rs, torch.Tensor) and rs is input: + unequal_size = rs.size() != input_size + unequal_strides = rs.stride() != input_strides + # resize_ should probably have inplace_view tag. Not adding the tag since it + # breaks some codegen logic + if unequal_size or unequal_strides: + if isinstance(func, torch._ops.OpOverloadPacket): + func = func.default + # Reference: https://github.com/pytorch/pytorch/issues/78759 + if func is not torch.ops.aten.resize_.default: + # TODO: use self.assertIn when we have separate tests for each tag + assert torch.Tag.inplace_view in func.tags + + +# A mode that when enabled runs correctness checks to ensure +# that operators have expected tags based on their input and +# output tensor properties +class TestTagsMode(TorchDispatchMode): + def __torch_dispatch__(self, func, types, args=(), kwargs=None): + if isinstance(args[0], torch.Tensor): + old_size = args[0].size() + old_stride = args[0].stride() + rs = func(*args, **kwargs) + check_inplace_view(func, args[0], rs, old_size, old_stride) + else: + rs = func(*args, **kwargs) + return rs + + +# Test to verify the correctness for tags in `tags.yaml`, also available for access through `torch.Tags` +@unMarkDynamoStrictTest +class TestTags(TestCase): + @onlyCPU + @ops(ops_and_refs, dtypes=OpDTypes.any_one) + def test_tags(self, device, dtype, op): + samples = op.sample_inputs(device, dtype, requires_grad=False) + for sample in samples: + # TODO: Test tags for ops that return a list of tensors + input = sample.input + if isinstance(input, torch.Tensor): + old_size = input.size() + old_stride = input.stride() + with TestTagsMode(): + rs = op(input, *sample.args, **sample.kwargs) + # TODO: add test for aliases: https://github.com/pytorch/pytorch/issues/78761 + aten_name = op.aten_name if op.aten_name is not None else op.name + opoverloadpacket = getattr(torch.ops.aten, aten_name, None) + check_inplace_view(opoverloadpacket, input, rs, old_size, old_stride) + + +class TestSelfKwarg(TestCase): + def test_self_kwargs(self): + """Verify that we can call the aten ops with all kwargs even if the + argument's name is "self" + """ + torch.ops.aten.reshape.default(self=torch.rand(1, 2), shape=[2]) + torch.ops.aten.min.default(self=torch.rand(100)) + + +@unMarkDynamoStrictTest +class TestRefsOpsInfo(TestCase): + import_paths = [ + "_refs", + "_refs.special", + "_refs.nn.functional", + "_refs.fft", + "_refs._conversions", + ] + module_alls = [ + (path, import_module(f"torch.{path}").__all__) for path in import_paths + ] + ref_ops_names = tuple( + itertools.chain.from_iterable( + [f"{path}.{op}" for op in module_all] for path, module_all in module_alls + ) + ) + ref_db_names = {ref_op.name for ref_op in python_ref_db} + + # TODO: References that do not have an entry in python_ref_db + skip_ref_ops = { + "_refs.alias", + "_refs.bitwise_right_shift", + "_refs.copy_to", + "_refs.empty_permuted", + "_refs.empty_strided", + "_refs.equal", + "_refs.full", + "_refs.full_like", + "_refs.is_complex", + "_refs.to", + "_refs.mvlgamma", + "_refs.ones", + "_refs.ones_like", + "_refs.special.expit", + "_refs.std_var", + "_refs.swap_axes", + "_refs.uniform", + "_refs.scalar_tensor", + "_refs.trunc_divide", + "_refs.zero", + "_refs.zeros", + "_refs.zeros_like", + "_refs.rfloordiv", + "_refs.rtruediv", + "_refs.rpow", + # These should be tested with their out-of-place counterparts + "_refs.index_add_", + "_refs.index_copy_", + "_refs.index_fill_", + "_refs.native_group_norm", + } + + not_in_decomp_table = { + # duplicated in _decomp and _refs + "_refs.nn.functional.group_norm", + "_refs.nn.functional.mse_loss", + "_refs.floor_divide", + # duplicated as refs do not have decent support for advanced indexing + "_refs.index_copy", + "_refs.index_copy_", + "_refs.index_add", + "_refs.index_add_", + # these are not aten ops? + "_refs._conversions.bfloat16", + "_refs._conversions.bool", + "_refs._conversions.byte", + "_refs._conversions.char", + "_refs._conversions.double", + "_refs._conversions.float", + "_refs._conversions.half", + "_refs._conversions.int", + "_refs._conversions.long", + "_refs._conversions.short", + "_refs._conversions.chalf", + "_refs._conversions.cfloat", + "_refs._conversions.cdouble", + "_refs.broadcast_shapes", + "_refs.broadcast_tensors", + "_refs.mvlgamma", + "_refs.nn.functional.layer_norm", + "_refs.nn.functional.tanhshrink", + "_refs.nn.functional.triplet_margin_loss", + "_refs.rfloordiv", + "_refs.rtruediv", + "_refs.rpow", + # CompositeImplicitAutograd + "_refs.allclose", + "_refs.atleast_1d", + "_refs.atleast_2d", + "_refs.atleast_3d", + "_refs.broadcast_to", + "_refs.chunk", + "_refs.column_stack", + "_refs.contiguous", + "_refs.dsplit", + "_refs.dstack", + "_refs.fill", + "_refs.fill_", + "_refs.flatten", + "_refs.fliplr", + "_refs.flipud", + "_refs.float_power", + "_refs.hsplit", + "_refs.hstack", + "_refs.isclose", + "_refs.isfinite", + "_refs.isreal", + "_refs.istft", + "_refs.log_softmax", + "_refs.movedim", + "_refs.narrow", + "_refs.nn.functional.dropout", + "_refs.nn.functional.l1_loss", + "_refs.nn.functional.smooth_l1_loss", + "_refs.nn.functional.log_softmax", + "_refs.nn.functional.poisson_nll_loss", + "_refs.nn.functional.softmax", + "_refs.nn.functional.softmin", + "_refs.positive", + "_refs.ravel", + "_refs.reshape", + "_refs.softmax", + "_refs.special.expit", + "_refs.special.log_softmax", + "_refs.special.softmax", + "_refs.square", + "_refs.stft", + "_refs.T", + "_refs.take_along_dim", + "_refs.tensor_split", + "_refs.to", + "_refs.true_divide", + "_refs.trunc_divide", + "_refs.vsplit", + "_refs.vstack", + "_refs.linalg.matrix_norm", + "_refs.linalg.norm", + "_refs.linalg.svd", + "_refs.linalg.svdvals", + "_refs.unflatten", + "_refs.sum_to_size", + # ref implementation missing kwargs + "_refs.full_like", # missing "layout" + "_refs.scalar_tensor", # missing "layout" + # other + "_refs.block_diag", # only refs._block_diag_iterable is in decomposition table + "_refs.empty", # intentional; direct empty is faster and has less guards + "_refs.empty_permuted", # intentional; direct empty is faster and has less guards + "_refs.expand_as", + "_refs.as_strided", # _prims._as_strided_meta: "reduce() of empty sequence with no initial value" + "_refs.copy_to", # torch._C._jit_get_operation: No such operator aten::copy_to + "_refs.equal", # 'bool' object has no attribute 'dtype' + "_refs.conj", # Calls _prims.conj + "_refs.real", + "_refs.imag", + "_refs.reshape_as", + "_refs.view_as", + "_refs.view_as_complex", # TorchInductor does not support complex at the moment. + # the decompositions for these ops are slightly different + # because of out handling + "_refs.var_mean", + "_refs.std_mean", + "_refs.native_layer_norm", + } + + @parametrize("op", ref_ops_names) + def test_refs_are_in_python_ref_db(self, op): + inplace = op[-1] == "_" + if op in self.skip_ref_ops: + raise unittest.SkipTest(f"{op} does not have an entry in python_ref_db") + elif inplace: + self.assertNotIn( + op, + self.ref_db_names, + msg=f"{op} is an in-place operation and should not have an OpInfo", + ) + else: + # Intentionally don't use assertIn to avoid printing the + # (very large) container + self.assertTrue(op in self.ref_db_names, msg=f"{op} not in ref_db_names") + + @parametrize("op", ref_ops_names) + def test_refs_are_in_decomp_table(self, op): + path = op.split(".") + module_path = ".".join(path[:-1]) + op_name = path[-1] + op_impl = getattr(import_module(f"torch.{module_path}"), op_name) + + if op in self.not_in_decomp_table: + self.assertNotIn( + op_impl, + torch._decomp.decomposition_table.values(), + f"Unexpectedly found {op} in torch._decomp.decomposition_table.values()", + ) + else: + self.assertIn( + op_impl, + torch._decomp.decomposition_table.values(), + f"Did not find {op} in torch._decomp.decomposition_table.values()", + ) + + +fake_skips = ( + "aminmax", # failing input + "cov", # aweights cannot be negtaive + "istft", # window overlap add min: 0 + "linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet + "linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend + "linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend + # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen + "linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend + "linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor + "linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend + "linalg.tensorsolve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' + "lu_solve", # MALLOC ERROR: debug + "multinomial", # Could not run 'aten::multinomial' with arguments from the 'Meta' backend + "mvlgamma.mvlgamma_p_1", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend + "mvlgamma.mvlgamma_p_3", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend + "mvlgamma.mvlgamma_p_5", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend + "nanmean", # logical_not() got an unexpected keyword argument 'out' + "quantile", # quantile() q values must be in the range [0, 1] + "nanquantile", # quantile() q values must be in the range [0, 1] + "nn.functional.ctc_loss", # The tensor has a non-zero number of elements, but its data is not allocated yet + "nn.functional.embedding_bag", # sometimes errors + "nn.functional.nll_loss", # sometimes errors + "nn.functional.max_pool1d", # The tensor has a non-zero number of elements + "to_sparse", # Could not run 'aten::_to_sparse' with arguments from the 'Meta' backend + "tensor_split", # The tensor has a non-zero number of elements, but its data is not allocated yet + "repeat_interleave", # cannot repeat_interleave a meta tensor without output_size + "sparse.sampled.addmm", # sparsity not supported + # Can not infer total number of classes from meta. no way at present to throw DynamicOutputShapeException + "nn.functional.one_hot", + "narrow", # Fails only for one overload with DataDependentOutputException (hence skip). +) + +fake_autocast_device_skips = defaultdict(dict) + +# TODO: investigate/fix +fake_autocast_device_skips["cpu"] = {"linalg.pinv"} + + +dynamic_output_op_tests = ( + "argwhere", + "bincount", + "combinations", + "linalg.lstsq", + "masked_select", + "nonzero", + "unique_consecutive", + "unique", + "linalg.lstsq.grad_oriented", +) + +# Ops that have dynamic output shapes that we can handle when +# allow_dynamic_shape_ops is True in fake tensor shape environment. +supported_dynamic_output_op_tests = ( + "nonzero", + "unique", + "repeat_interleave", + "masked_select", +) + +# some inputs invoke dynamic output shape operators, some do not +sometimes_dynamic_output_op_test = ( + "__getitem__", + "index_select", +) + +data_dependent_op_tests = ( + "equal", + "corrcoef", + "nn.functional.gaussian_nll_loss", + "allclose", +) + +aliasing_failures = ("histogramdd",) + +fake_backward_skips = { + "linalg.cond", + "linalg.matrix_norm", + "linalg.norm", + "linalg.svd", + "linalg.svdvals", + "pca_lowrank", + "roll", + "svd_lowrank", + "sgn", +} + +fake_backward_xfails = {skip(s) for s in fake_backward_skips} | { + xfail("fft.ihfftn"), # Mismatch in aten._conj_physical.default + xfail("fft.ihfft2"), # Mismatch in aten._conj_physical.default + skip("nn.functional.ctc_loss"), +} + +fake_autocast_backward_xfails = { + skip("nn.functional.binary_cross_entropy"), + skip("sparse.sampled_addmm"), + skip("linalg.pinv"), + skip("linalg.pinv", "hermitian"), + skip("linalg.pinv", "singular"), + skip("pinverse"), +} + + +@unMarkDynamoStrictTest +class TestFakeTensor(TestCase): + def setUp(self): + # Turn on FakeTensor caching and cross-checking for these tests: + cache_enabled = unittest.mock.patch( + "torch._dynamo.config.fake_tensor_cache_enabled", True + ) + cache_enabled.start() + self.addCleanup(cache_enabled.stop) + + cache_crosscheck = unittest.mock.patch( + "torch._dynamo.config.fake_tensor_cache_crosscheck_enabled", True + ) + cache_crosscheck.start() + self.addCleanup(cache_crosscheck.stop) + + def _test_fake_helper(self, device, dtype, op, context): + name = op.name + if op.variant_test_name: + name += "." + op.variant_test_name + if name in fake_skips or "sparse" in name or "jiterator" in name: + self.skipTest("Skip failing test") + + samples = op.sample_inputs(device, dtype, requires_grad=False) + for sample in samples: + mode = FakeTensorMode() + + from torch.fx.experimental.symbolic_shapes import ShapeEnv + + allow_dynamic_output_shape_shape_env = ShapeEnv( + allow_dynamic_output_shape_ops=True + ) + + allow_dynamic_output_shape_mode = FakeTensorMode( + shape_env=allow_dynamic_output_shape_shape_env + ) + + try: + with context(): + res = op(sample.input, *sample.args, **sample.kwargs) + except Exception: + continue + + def run_with_fake_mode_and_verify(fake_mode, match_results=True): + def map_to_fake(e): + if isinstance(e, torch.Tensor): + return fake_mode.from_tensor(e) + else: + return e + + input = tree_map(map_to_fake, sample.input) + args = tree_map(map_to_fake, sample.args) + kwargs = tree_map(map_to_fake, sample.kwargs) + + try: + with context(): + with fake_mode: + res_fake = op(input, *args, **kwargs) + + if not match_results: + return + + for fake_out, real_out in zip( + pytree.tree_leaves(res_fake), pytree.tree_leaves(res) + ): + if not isinstance(fake_out, torch.Tensor): + self.assertTrue(not isinstance(real_out, torch.Tensor)) + self.assertEqual(fake_out, real_out) + continue + + self.assertTrue(isinstance(fake_out, FakeTensor)) + # if you see a shape exception here, you may need to add + # a `dynamic_output_shape` tag to an operator + + # prims/decomps must correctly model strides, + # see https://github.com/pytorch/pytorch/issues/78050#issuecomment-1253950325 + prims.utils.compare_tensor_meta(fake_out, real_out, True) + + if name not in aliasing_failures: + fake_aliasing = outputs_alias_inputs( + (input, args, kwargs), res_fake + ) + real_aliasing = outputs_alias_inputs( + (sample.input, sample, args, sample.kwargs), res + ) + self.assertEqual(fake_aliasing, real_aliasing) + + self.assertTrue( + name not in dynamic_output_op_tests + and name not in data_dependent_op_tests + ) + + except torch._subclasses.fake_tensor.UnsupportedFakeTensorException: + pass + except torch._subclasses.fake_tensor.UnsupportedOperatorException: + pass + except torch._subclasses.fake_tensor.DynamicOutputShapeException: + self.assertTrue( + name in dynamic_output_op_tests + or name in sometimes_dynamic_output_op_test + ) + self.assertTrue( + mode.shape_env is None + or not mode.shape_env.allow_dynamic_output_shape_ops + or name not in supported_dynamic_output_op_tests + ) + except torch._subclasses.fake_tensor.DataDependentOutputException: + self.assertTrue(name in data_dependent_op_tests) + + run_with_fake_mode_and_verify(mode) + if name in supported_dynamic_output_op_tests: + run_with_fake_mode_and_verify( + allow_dynamic_output_shape_mode, match_results=False + ) + + @ops(op_db, dtypes=OpDTypes.any_one) + def test_pointwise_ops(self, device, dtype, op): + name = op.name + if op.variant_test_name: + name += "." + op.variant_test_name + if name in fake_skips or "sparse" in name or "jiterator" in name: + self.skipTest("Skip failing test") + + test_self = self + + class TestPointwiseMode(TorchDispatchMode): + def __torch_dispatch__(self, func, types, args=(), kwargs=None): + kwargs = kwargs or {} + + out = func(*args, **kwargs) + + if torch.Tag.pointwise in func.tags: + shapes = [] + for inp in pytree.arg_tree_leaves(*args, **kwargs): + if isinstance(inp, torch.Tensor): + shapes.append(inp.shape) + + out_shape = torch._refs._broadcast_shapes(*shapes) + + for out_elem in pytree.tree_leaves(out): + if isinstance(out_elem, torch.Tensor): + test_self.assertEqual(out_elem.shape, out_shape) + + return out + + samples = op.sample_inputs(device, dtype, requires_grad=False) + for sample in samples: + mode = FakeTensorMode() + + def map_to_fake(e): + if isinstance(e, torch.Tensor): + return mode.from_tensor(e) + else: + return e + + input = tree_map(map_to_fake, sample.input) + args = tree_map(map_to_fake, sample.args) + kwargs = tree_map(map_to_fake, sample.kwargs) + + try: + op(input, *args, **kwargs) + except Exception as e: + continue + + with TestPointwiseMode(): + with mode: + op(input, *args, **kwargs) + + @ops(op_db, dtypes=OpDTypes.any_one) + def test_fake(self, device, dtype, op): + self._test_fake_helper(device, dtype, op, contextlib.nullcontext) + + @ops(op_db, dtypes=OpDTypes.any_one) + def test_fake_autocast(self, device, dtype, op): + if op.name in fake_autocast_device_skips[device]: + self.skipTest("Skip failing test") + context = ( + torch.cuda.amp.autocast if device == "cuda" else torch.cpu.amp.autocast + ) + self._test_fake_helper(device, dtype, op, context) + + def _test_fake_crossref_helper(self, device, dtype, op, context): + samples = op.sample_inputs(device, dtype, requires_grad=True) + + for iter, sample in enumerate(samples): + args = [sample.input] + list(sample.args) + kwargs = sample.kwargs + + # skip these to speed up tests + common_skip_ops = ( + aten.detach.default, + aten.empty_strided.default, + aten.copy_.default, + aten.is_same_size.default, + ) + + # TODO: enable check_aliasing, batch norm fails + try: + with torch._subclasses.CrossRefFakeMode( + ignore_op_fn=lambda fn: fn in common_skip_ops, check_aliasing=True + ): + with warnings.catch_warnings(), context(), torch.autograd.set_multithreading_enabled( + False + ): + composite_compliance.compute_expected_grads( + op.get_op(), + args, + kwargs, + sample.output_process_fn_grad, + op.gradcheck_wrapper, + ) + except torch._subclasses.fake_tensor.UnsupportedOperatorException: + pass + + @onlyCUDA + @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) + @skipOps( + "TestFakeTensor", "test_fake_crossref_backward_no_amp", fake_backward_xfails + ) + def test_fake_crossref_backward_no_amp(self, device, dtype, op): + self._test_fake_crossref_helper(device, dtype, op, contextlib.nullcontext) + + @onlyCUDA + @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) + @skipOps( + "TestFakeTensor", + "test_fake_crossref_backward_amp", + fake_backward_xfails | fake_autocast_backward_xfails, + ) + def test_fake_crossref_backward_amp(self, device, dtype, op): + self._test_fake_crossref_helper(device, dtype, op, torch.cuda.amp.autocast) + + @ops([op for op in ops_and_refs if op.is_factory_function]) + def test_strided_layout(self, device, dtype, op): + samples = op.sample_inputs(device, dtype) + for sample in samples: + kwargs = sample.kwargs.copy() + kwargs["layout"] = torch.strided + strided_result = op(sample.input, *sample.args, **kwargs) + self.assertEqual(strided_result.layout, torch.strided) #instantiate_device_type_tests(TestCommon, globals(), only_for="xpu") @@ -2693,6 +2692,4 @@ def check_cow_input( if __name__ == "__main__": TestCase._default_dtype_check_enabled = True - #import pdb - #pdb.set_trace() run_tests() diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 8d93fcc590b2c..322ead8db2f3c 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -10588,7 +10588,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_autocast'), # Booleans mismatch: AssertionError: False is not true DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake'), - #DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=None), ) ), OpInfo('arange', diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index 6ae58240ea75a..a949574326323 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -1030,9 +1030,9 @@ def __post_init__(self): else: #self.skips = (DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=skip_dtypes)) self.skips = (DecorateInfo(unittest.skip, device_type="xpu", dtypes=None)) - print("#### skipXPU on {} {} {}".format(self.name, skip_dtypes, self.skips)) + #print("Skip XPU backend on {} with {} and {}".format(self.name, skip_dtypes, self.skips)) else: - print("#### Don't skipXPU on {}".format(self.name)) + print("Won't skip XPU backend on op {}".format(self.name)) self.decorators = (*self.decorators, *self.skips) @@ -1375,8 +1375,7 @@ def sample_inputs_sparse_bsc(self, device, dtype, requires_grad=False, **kwargs) def get_decorators(self, test_class, test_name, device, dtype, param_kwargs): """Returns the decorators targeting the given test.""" result = [] - #import pdb - #pdb.set_trace() + for decorator in self.decorators: if isinstance(decorator, DecorateInfo): if decorator.is_active( From fa6c8ae1da4bd242d1319ba431abe474a593da03 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 21 May 2024 20:35:49 -0700 Subject: [PATCH 04/37] refine the xpu ops switch method --- test/test_ops.py | 25 ++-- torch/testing/_internal/common_device_type.py | 11 +- .../_internal/common_methods_invocations.py | 128 +++++++++--------- torch/testing/_internal/opinfo/core.py | 37 ++--- 4 files changed, 92 insertions(+), 109 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 5a8d9bc461f8d..70c2eb41eb26e 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -108,10 +108,6 @@ ) ) -if TEST_XPU: - any_common_cpu_device_one = OpDTypes.any_common_cpu_xpu_one -else: - any_common_cpu_device_one = OpDTypes.any_common_cpu_cuda_one @@ -136,11 +132,13 @@ def reduction_dtype_filter(op): aten = torch.ops.aten -# _xpu_computation_op_list = ["_refs.abs", "_refs.all", "item", "abs", "add", "_refs.fill"] -# _xpu_computation_op_list = ["abs"] -# _xpu_computation_ops = [ -# op for op in ops_and_refs if op.name in _xpu_computation_op_list -# ] +def any_common_cpu_device_one(): + # import pdb + # pdb.set_trace() + return OpDTypes.any_common_cpu_xpu_one if TEST_XPU else OpDTypes.any_common_cpu_cuda_one + +def has_gpu_device(devices: List[str]): + return "cuda" in devices or "xpu" in devices # Tests that apply to all operators and aren't related to any particular # system @@ -286,7 +284,7 @@ def test_numpy_ref(self, device, dtype, op): and op.formatted_name in ("signal_windows_exponential", "signal_windows_bartlett") and dtype == torch.float64 - and ("cuda" in device or "xpu" in device) + and has_gpu_device(device) ): # noqa: E121 raise unittest.SkipTest("XXX: raises tensor-likes are not close.") @@ -301,8 +299,7 @@ def test_numpy_ref(self, device, dtype, op): @onlyCUDAAndXPU @suppress_warnings @slowTest - @ops(_ops_and_refs_with_no_numpy_ref, dtypes=any_common_cpu_device_one) - #@ops(_xpu_computation_ops, dtypes=any_common_cpu_device_one) + @ops(_ops_and_refs_with_no_numpy_ref, dtypes=any_common_cpu_device_one()) def test_compare_cpu(self, device, dtype, op): def to_cpu(arg): if isinstance(arg, torch.Tensor): @@ -2683,8 +2680,8 @@ def test_strided_layout(self, device, dtype, op): self.assertEqual(strided_result.layout, torch.strided) -#instantiate_device_type_tests(TestCommon, globals(), only_for="xpu") -instantiate_device_type_tests(TestCompositeCompliance, globals(), only_for="xpu") +instantiate_device_type_tests(TestCommon, globals(), only_for="xpu") +#instantiate_device_type_tests(TestCompositeCompliance, globals(), only_for="xpu") #instantiate_device_type_tests(TestMathBits, globals()) #instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") #instantiate_device_type_tests(TestFakeTensor, globals()) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 17bd8357f15b0..86940f6c95cdc 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -397,19 +397,13 @@ def instantiate_test_helper(cls, name, *, test, param_kwargs=None, decorator_fn= # Add the device param kwarg if the test needs device or devices. param_kwargs = {} if param_kwargs is None else param_kwargs test_sig_params = inspect.signature(test).parameters - #import pdb - #pdb.set_trace() if 'device' in test_sig_params or 'devices' in test_sig_params: device_arg: str = cls._init_and_get_primary_device() if hasattr(test, 'num_required_devices'): device_arg = cls.get_all_devices() _update_param_kwargs(param_kwargs, 'device', device_arg) - #import pdb - #pdb.set_trace() # Apply decorators based on param kwargs. for decorator in decorator_fn(param_kwargs): - #import pdb - #pdb.set_trace() test = decorator(test) # Constructs the test @@ -845,6 +839,7 @@ class OpDTypes(Enum): any_common_cpu_xpu_one = 7 # Test precisely one supported dtype that is common to both xpu and cpu + # Arbitrary order ANY_DTYPE_ORDER = ( torch.float32, @@ -920,8 +915,6 @@ def _parametrize_test(self, test, generic_cls, device_cls): 'instantiate_parametrized_tests()') op = check_exhausted_iterator = object() - #import pdb - #pdb.set_trace() for op in self.op_list: # Determine the set of dtypes to use. dtypes: Union[Set[torch.dtype], Set[None]] @@ -1615,3 +1608,5 @@ def skipPRIVATEUSE1(fn): # This should probably enumerate all available device type test base classes. def get_all_device_types() -> List[str]: return ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] + + diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 322ead8db2f3c..872fca9514c8f 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -10422,7 +10422,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), - skipXPU=False, skips=( DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), 'TestBwdGradients', 'test_inplace_grad', dtypes=(torch.cdouble,)), @@ -10456,7 +10455,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_sparse_csc=True, supports_sparse_bsr=True, supports_sparse_bsc=True, - supports_forward_ad=True), + supports_forward_ad=True, + enable_skipped_device=('xpu',)), # NOTE: CPU complex acos produces incorrect outputs (https://github.com/pytorch/pytorch/issues/42952) UnaryUfuncInfo('acos', aliases=('arccos', ), @@ -10545,7 +10545,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, supports_forward_ad=True, supports_two_python_scalars=True, - skipXPU=False, decorators=( DecorateInfo( toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}), @@ -10565,7 +10564,9 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestBinaryUfuncs', 'test_reference_numerics_extremal_values', dtypes=(torch.complex64, torch.complex128)), - )), + ), + enable_skipped_device=('xpu',), + ), OpInfo('item', op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.item, inp, *args, **kwargs), ref=np.ndarray.item, @@ -10575,7 +10576,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_autograd=False, error_inputs_func=error_inputs_item, sample_inputs_func=sample_inputs_item, - skipXPU=False, skips=( # Error testing item function variant DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', @@ -10588,7 +10588,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_autocast'), # Booleans mismatch: AssertionError: False is not true DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake'), - ) + ), + enable_skipped_device=('xpu',), ), OpInfo('arange', dtypes=all_types_and(torch.bfloat16, torch.float16), @@ -10597,7 +10598,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): is_factory_function=True, error_inputs_func=error_inputs_arange, sample_inputs_func=sample_inputs_arange, - skipXPU=False, skips=( # https://github.com/pytorch/pytorch/issues/81774 DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'), @@ -10626,7 +10626,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # UserWarning not triggered : Resized a non-empty tensor but did not warn about it. DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), - )), + ), + enable_skipped_device=('xpu',)), OpInfo('cauchy', op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.cauchy_, inp, *args, **kwargs), inplace_variant=torch.Tensor.cauchy_, @@ -10806,7 +10807,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_rhs_python_scalar=False, supports_fwgrad_bwgrad=True, rhs_make_tensor_kwargs=dict(exclude_zero=False), - skipXPU=False, skips=( # RuntimeError: "max_elementwise_cuda" not implemented for 'ComplexFloat' DecorateInfo(unittest.expectedFailure, @@ -10817,7 +10817,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'), # test error disabled since rhs non-tensor python scalar is supported DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors'), - )), + ), + enable_skipped_device=('xpu',),), BinaryUfuncInfo('clamp_min', ref=_clamp_min_numpy, dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16), @@ -10825,7 +10826,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_rhs_python_scalar=False, supports_fwgrad_bwgrad=True, rhs_make_tensor_kwargs=dict(exclude_zero=False), - skipXPU=False, skips=( # RuntimeError: "min_elementwise_cuda" not implemented for 'ComplexFloat' DecorateInfo(unittest.expectedFailure, @@ -10836,7 +10836,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'), # test error disabled since rhs non-tensor python scalar is supported DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors'), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('mul', aliases=('multiply',), dtypes=all_types_and_complex_and(torch.chalf, torch.float16, torch.bfloat16, torch.bool), @@ -10850,7 +10851,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csc), sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsr), sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsc), - skipXPU=False), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('sub', # NumPy has no builtin reference for the alpha kwarg, but it is easy enough to emulate @@ -10878,7 +10879,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): toleranceOverride({torch.chalf: tol(atol=5e-3, rtol=0)}), 'TestDecomp', 'test_quick', device_type='cpu'), ), - skipXPU=False, skips=( DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', @@ -10888,7 +10888,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestBinaryUfuncs', 'test_reference_numerics_small_values', dtypes=(torch.uint8,)), - )), + ), + enable_skipped_device=('xpu',)), OpInfo('addmm', # This addmm OpInfo is for when alpha and beta are not both equal to 1. # alpha=beta=1 is tested in the following opinfo, because that special case will @@ -11331,7 +11332,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=integral_types_and(torch.bool), operator_variant=operator.invert, supports_autograd=False, - skipXPU=False), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('bitwise_left_shift', op=torch.bitwise_left_shift, dtypes=integral_types(), @@ -11461,13 +11462,13 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_forward_ad=True, supports_fwgrad_bwgrad=True, supports_out=False, - skipXPU=False, skips=( # TypeError: _copy_dispatcher() got an unexpected keyword argument 'memory_format' # (NumPy reference needs to be extended with memory_format) DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref'), DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref_mps'), - ),), + ), + enable_skipped_device=('xpu',)), OpInfo('contiguous', op=lambda x, *args, **kwargs: x.contiguous(*args, **kwargs), dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf), @@ -11503,14 +11504,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): assert_autodiffed=True, supports_forward_ad=True, supports_fwgrad_bwgrad=True, - skipXPU=False, skips=( # NNC appear to not handle boolean clamp DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.bool,)), - )), + ), + enable_skipped_device=('xpu',)), UnaryUfuncInfo('positive', ref=np.positive, dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), @@ -11574,7 +11575,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_view_as_real, test_conjugated_samples=False, - skipXPU=False, + enable_skipped_device=('xpu',), ), OpInfo('view_as_complex', dtypes=floating_types_and(torch.half), @@ -11583,7 +11584,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, test_neg_view=False, sample_inputs_func=sample_inputs_view_as_complex, - skipXUP=False, skips=( # RuntimeError: Tensor must have a last dimension with stride 1 DecorateInfo(unittest.expectedFailure, "TestCommon", "test_noncontiguous_samples"), @@ -11591,7 +11591,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.half,)), # RuntimeError: view size is not compatible with input tensor's size and stride DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('complex', dtypes=floating_types_and(torch.half), supports_forward_ad=True, @@ -11637,7 +11638,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, promotes_int_to_float=True, decorators=(precisionOverride({torch.bfloat16: 1e-2}),), - skipXPU=False, skips=( DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large', dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu', active_if=IS_WINDOWS), @@ -11655,7 +11655,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_reference_numerics_large', device_type='cuda', dtypes=(torch.chalf,), active_if=IS_WINDOWS), - )), + ), + enable_skipped_device=('xpu',)), UnaryUfuncInfo('cosh', ref=np_unary_ufunc_integer_promotion_wrapper(np.cosh), dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), @@ -11726,12 +11727,12 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.half, torch.bfloat16), supports_forward_ad=True, supports_fwgrad_bwgrad=True, - skipXPU=False, skips=( # cumsum does not handle correctly out= dtypes DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'), ), - sample_inputs_func=sample_inputs_cumulative_ops), + sample_inputs_func=sample_inputs_cumulative_ops, + enable_skipped_device=('xpu',)), OpInfo('cumprod', dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16), supports_forward_ad=True, @@ -11804,7 +11805,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_two_python_scalars=True, assert_autodiffed=True, rhs_make_tensor_kwargs=dict(exclude_zero=True), - skipXPU=False,), + enable_skipped_device=('xpu',),), BinaryUfuncInfo('div', aliases=('divide',), @@ -11822,11 +11823,11 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # See https://github.com/pytorch/pytorch/issues/111126 DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'), ), - skipXPU=False, skips=( # RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('div', aliases=('divide',), variant_test_name='floor_rounding', @@ -11843,11 +11844,11 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # See https://github.com/pytorch/pytorch/issues/111126 DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'), ), - skipXPU=False, skips=( # RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('true_divide', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), @@ -11947,9 +11948,9 @@ def reference_flatten(input, start_dim=0, end_dim=-1): always_returns_bool=True, supports_autograd=False, sample_inputs_func=sample_inputs_comparison_ops, - skipXPU=False, skips=( - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('fmax', op=torch.fmax, dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool), @@ -11981,7 +11982,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, assert_autodiffed=None, rhs_make_tensor_kwargs={'exclude_zero': True}, - skipXPU=False, decorators=( DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_contig_vs_every_other', @@ -11995,7 +11995,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_small_values', dtypes=(torch.uint8,)), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('remainder', ref=np.remainder, dtypes=all_types_and(torch.float16, torch.bfloat16), @@ -13028,12 +13029,12 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_operator_variant=operator.iand, supports_autograd=False, supports_one_python_scalar=True, - skipXPU=False, skips=( # RuntimeError: "bitwise_and_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('bitwise_or', ref=np.bitwise_or, dtypes=integral_types_and(torch.bool), @@ -13041,14 +13042,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_operator_variant=operator.ior, supports_autograd=False, supports_one_python_scalar=True, - skipXPU=False, skips=( # TODO: FIXME: RuntimeError: "bitwise_or_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('bitwise_xor', ref=np.bitwise_xor, dtypes=integral_types_and(torch.bool), @@ -13056,14 +13057,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_operator_variant=operator.ixor, supports_autograd=False, supports_one_python_scalar=True, - skipXPU=False, skips=( # TODO: FIXME: RuntimeError: "bitwise_xor_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'), - )), + ), + enable_skipped_device=('xpu',)), BinaryUfuncInfo('heaviside', ref=lambda a, b: ( # necessary because np.heaviside incorrectly returns float64 when passed args of dtype int64 @@ -13093,12 +13094,13 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=integral_types_and(), supports_autograd=False, supports_rhs_python_scalar=False, - skipXPU=False, skips=( DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_reference_numerics_small_values', - dtypes=(torch.int8,)),)), + dtypes=(torch.int8,)),), + enable_skipped_device=('xpu',) + ), BinaryUfuncInfo('isclose', ref=np.isclose, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), @@ -15408,13 +15410,13 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_forward_ad=True, supports_fwgrad_bwgrad=True, autodiff_nonfusible_nodes=["aten::gelu"], - skipXPU=False, skips=( # AssertionError: Tensor-likes are not close! # May not replicate in CI DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'), DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'), - )), + ), + enable_skipped_device=('xpu',)), UnaryUfuncInfo('nn.functional.relu6', aten_name="relu6", dtypes=all_types_and(torch.half, torch.bfloat16), @@ -17256,9 +17258,9 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_csc), sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsr), sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsc), - skipXPU=False, skips=( - )), + ), + enable_skipped_device=('xpu',)), OpInfo('ones_like', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), supports_out=False, @@ -17423,7 +17425,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), supports_out=True, sample_inputs_func=sample_inputs_ones_zeros, - skipXPU=False, skips=( # Tests that assume input is a tensor or sequence of tensors DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'), @@ -17436,7 +17437,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # UserWarning not triggered : Resized a non-empty tensor but did not warn about it. DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), - )), + ), + enable_skipped_device=('xpu',)), OpInfo('full', op=torch.full, supports_autograd=False, @@ -17566,7 +17568,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), sample_inputs_func=sample_inputs_empty, supports_autograd=False, - skipXPU=False, skips=( DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), # Empty tensor data is garbage so it's hard to make comparisons with it. @@ -17602,7 +17603,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.skip("Expected: empty is not comparable"), 'TestCommon', 'test_complex_half_reference_testing'), DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'), - )), + ), + enable_skipped_device=('xpu',)), OpInfo('eye', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), sample_inputs_func=sample_inputs_eye, @@ -17789,7 +17791,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_bernoulli, error_inputs_func=error_inputs_bernoulli, - skipXPU=False, skips=( # vmap: We do not yet support calling random operations inside of vmap DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'), @@ -17801,7 +17802,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'), # UserWarning not triggered : Resized a non-empty tensor but did not warn about it. DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), - DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'))), + DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')), + enable_skipped_device=('xpu',)), OpInfo('scatter_add', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), sample_inputs_func=sample_inputs_scatter_add, @@ -18048,10 +18050,10 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_repeat_tile, - skipXPU=True, skips=( DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), - )), + ), + enable_skipped_device=('xpu',)), OpInfo('squeeze', ref=_squeeze_ref, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), @@ -18094,14 +18096,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): check_batched_forward_grad=False, dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16), supports_out=False, - skipXPU=False, skips=( # JIT has issue when op is passed as lambda # AssertionError: JIT Test does not execute any logic DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'), DecorateInfo(unittest.skip("No fill_ op"), 'TestCudaFuserOpInfo'), DecorateInfo(unittest.skip("No fill_ op"), 'TestNNCOpInfo'), - )), + ), + enable_skipped_device=('xpu',)), OpInfo('resize_', op=lambda x, shape: x.clone().resize_(shape), method_variant=None, @@ -18112,14 +18114,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), supports_out=False, supports_autograd=False, - skipXPU=False, skips=( # Cannot resize variables that require grad DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'), DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), DecorateInfo(unittest.skip("Allowed exception"), 'TestCompositeCompliance', 'test_operator'), ), - sample_inputs_func=sample_inputs_resize_ops), + sample_inputs_func=sample_inputs_resize_ops, + enable_skipped_device=('xpu',)), OpInfo('resize_as_', op=lambda x, other: torch.resize_as_(x.clone(), other), method_variant=None, @@ -18127,13 +18129,13 @@ def reference_flatten(input, start_dim=0, end_dim=-1): dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), supports_out=False, supports_autograd=False, - skipXPU=False, skips=( # Cannot resize variables that require grad DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'), DecorateInfo(unittest.skip('Allowed exemption'), 'TestCompositeCompliance', 'test_operator'), ), - sample_inputs_func=sample_inputs_resize_ops), + sample_inputs_func=sample_inputs_resize_ops, + enable_skipped_device=('xpu',)), OpInfo('take_along_dim', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), @@ -19238,11 +19240,11 @@ def reference_flatten(input, start_dim=0, end_dim=-1): result_dtype=torch.bool, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), ref=reference_reduction_numpy(np.all), - skipXPU=False, skips=( # FIXME: uint8 input returns uint8 instead of bool DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_result_dtype', dtypes=[torch.uint8]), ), + enable_skipped_device=('xpu',), ), ReductionOpInfo( 'any', diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index a949574326323..5d1ac019c8b4b 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -5,7 +5,8 @@ import math import operator import unittest -from dataclasses import asdict, dataclass +from dataclasses import InitVar, asdict, dataclass +from typing import Dict, Optional from enum import Enum from functools import partial from itertools import product @@ -25,7 +26,6 @@ floating_and_complex_types, floating_and_complex_types_and, floating_types, - empty_types, ) from torch.testing._internal.common_utils import ( is_iterable_of_tensors, @@ -683,8 +683,11 @@ class OpInfo: # information about which tests to skip skips: Tuple = tuple() - # skip xpu by default - skipXPU: bool = True + # skip the test for a device + skip_device: Tuple = tuple() + + # enable the test for a device + enable_skipped_device: Tuple = tuple() # decorators to apply to generated tests decorators: Tuple = tuple() @@ -1021,18 +1024,13 @@ def __post_init__(self): else: self.inplace_operator_variant = None - if self.skipXPU == True: - skip_dtypes= self.dtypesIfXPU - + # Skip XPU test by default + self.skip_device = ('xpu',) + for device in (set(self.skip_device).difference(set(self.enable_skipped_device))): if self.skips is not None: - #self.skips = (*self.skips, DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=skip_dtypes)) - self.skips = (*self.skips, DecorateInfo(unittest.skip, device_type="xpu", dtypes=None)) + self.skips = (*self.skips, DecorateInfo(unittest.skip, device_type=device, dtypes=None)) else: - #self.skips = (DecorateInfo(unittest.skip, 'TestCommon', 'test_compare_cpu', device_type="xpu", dtypes=skip_dtypes)) - self.skips = (DecorateInfo(unittest.skip, device_type="xpu", dtypes=None)) - #print("Skip XPU backend on {} with {} and {}".format(self.name, skip_dtypes, self.skips)) - else: - print("Won't skip XPU backend on op {}".format(self.name)) + self.skips = (DecorateInfo(unittest.skip, device_type=device, dtypes=None)) self.decorators = (*self.decorators, *self.skips) @@ -1567,7 +1565,6 @@ def __init__( yield tuple(), {}, ), - skipXPU: bool = True, # Options from the OpInfo base class **kwargs, ): @@ -1591,7 +1588,7 @@ def sample_inputs_func(*args, **kwargs): # Override OpInfo defaults and call base class __init__ kwargs.setdefault("inplace_variant", None) kwargs.setdefault("sample_inputs_func", sample_inputs_func) - super().__init__(name, promotes_int_to_float=promotes_int_to_float, skipXPU = skipXPU, **kwargs) + super().__init__(name, promotes_int_to_float=promotes_int_to_float, **kwargs) self.identity = identity self.nan_policy = nan_policy @@ -2156,7 +2153,6 @@ def __init__( supports_rhs_python_scalar=True, # Whether the operator allows Tensor x scalar inputs supports_one_python_scalar=False, # Whether the operator allows scalar x tensor and tensor x scalar inputs supports_two_python_scalars=False, # Whether the operator allows scalar x scalar inputs - skipXPU=True, **kwargs, ): self._original_binary_ufunc_args = locals().copy() @@ -2177,7 +2173,6 @@ def __init__( sample_inputs_func=sample_inputs_func, reference_inputs_func=reference_inputs_func, error_inputs_func=make_error_inputs_elementwise_binary(error_inputs_func), - skipXPU=skipXPU, **kwargs, ) @@ -2506,7 +2501,6 @@ def __init__( reference_inputs_func=reference_inputs_elementwise_unary, sample_kwargs=lambda device, dtype, input: ({}, {}), reference_numerics_filter=None, # Filters values in the range of the domain specified above but that should not be tested - skipXPU=True, **kwargs, ): self._original_unary_ufunc_args = locals().copy() @@ -2516,7 +2510,6 @@ def __init__( dtypes=dtypes, sample_inputs_func=sample_inputs_func, reference_inputs_func=reference_inputs_func, - skipXPU=skipXPU, **kwargs, ) @@ -2651,7 +2644,6 @@ def __init__( ndimensional: SpectralFuncType, sample_inputs_func=sample_inputs_spectral_ops, decorators=None, - skipXPU=True, **kwargs, ): self._original_spectral_func_args = dict(locals()).copy() @@ -2672,7 +2664,6 @@ def __init__( dtypes=dtypes, decorators=decorators, sample_inputs_func=sample_inputs_func, - skipXPU=skipXPU, **kwargs, ) self.ref = ref @@ -2691,7 +2682,6 @@ def __init__( dtypesIfCUDA=None, dtypesIfROCM=None, sample_inputs_func=None, - skipXPU=True, **kwargs, ): super().__init__( @@ -2700,7 +2690,6 @@ def __init__( dtypesIfCUDA=dtypesIfCUDA, dtypesIfROCM=dtypesIfROCM, sample_inputs_func=sample_inputs_func, - skipXPU=skipXPU, **kwargs, ) self.ref = ref From 22ad9f82a0a1979d64313021d2ee2ef9eec25aac Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 21 May 2024 20:38:37 -0700 Subject: [PATCH 05/37] remove unused comments --- test/test_ops.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 70c2eb41eb26e..a6be62fe3cef6 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -133,8 +133,6 @@ def reduction_dtype_filter(op): aten = torch.ops.aten def any_common_cpu_device_one(): - # import pdb - # pdb.set_trace() return OpDTypes.any_common_cpu_xpu_one if TEST_XPU else OpDTypes.any_common_cpu_cuda_one def has_gpu_device(devices: List[str]): From d87908360b0280f068787c6756dbb7190f008d50 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 26 May 2024 02:32:31 -0700 Subject: [PATCH 06/37] added xpu_op_db.yaml to enable xpu op UT. Refined onlyOn interface to support multiple devices.Support PYTORCH_TESTING_DEVICE_ONLY_FOR='xpu' to enable xpu test --- test/test_ops.py | 20 +- test/xpu/xpu_op_db.yaml | 87 ++++++++ torch/testing/_internal/common_device_type.py | 27 ++- .../_internal/common_methods_invocations.py | 185 ++++++++++-------- torch/testing/_internal/common_utils.py | 14 ++ torch/testing/_internal/opinfo/core.py | 22 ++- 6 files changed, 241 insertions(+), 114 deletions(-) create mode 100644 test/xpu/xpu_op_db.yaml diff --git a/test/test_ops.py b/test/test_ops.py index a6be62fe3cef6..ecefb53195788 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -810,7 +810,7 @@ def _extract_strides(out): # NOTE: only extracts on the CPU and CUDA device types since some # device types don't have storage def _extract_data_ptrs(out): - if self.device_type != "cpu" and self.device_type != "cuda": + if self.device_type != "cpu" and self.device_type != "cuda" and self.device_type != "xpu": return () if isinstance(out, torch.Tensor): @@ -938,7 +938,7 @@ def _extract_strides(out): # NOTE: only extracts on the CPU and CUDA device types since some # device types don't have storage def _extract_data_ptrs(out): - if self.device_type != "cpu" and self.device_type != "cuda": + if self.device_type != "cpu" and self.device_type != "cuda" and self.device_type != "xpu": return () if isinstance(out, torch.Tensor): @@ -1016,7 +1016,6 @@ def _case_two_transform(t): elif torch.cuda.is_available(): wrong_device = "cuda" elif torch.xpu.is_available(): - # Daisy ???? wrong_device = "xpu" factory_fn_msg = ( @@ -1674,7 +1673,8 @@ def test_forward_ad(self, device, dtype, op): composite_compliance.check_forward_ad_formula( op.get_op(), args, kwargs, op.gradcheck_wrapper, self.assertEqual ) - + + @skipXPU @ops(op_db, allowed_dtypes=(torch.float,)) def test_cow_input(self, device, dtype, op): samples = op.sample_inputs(device, dtype, requires_grad=op.supports_autograd) @@ -2678,12 +2678,12 @@ def test_strided_layout(self, device, dtype, op): self.assertEqual(strided_result.layout, torch.strided) -instantiate_device_type_tests(TestCommon, globals(), only_for="xpu") -#instantiate_device_type_tests(TestCompositeCompliance, globals(), only_for="xpu") -#instantiate_device_type_tests(TestMathBits, globals()) -#instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") -#instantiate_device_type_tests(TestFakeTensor, globals()) -#instantiate_device_type_tests(TestTags, globals()) +instantiate_device_type_tests(TestCommon, globals()) +instantiate_device_type_tests(TestCompositeCompliance, globals()) +instantiate_device_type_tests(TestMathBits, globals()) +instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") +instantiate_device_type_tests(TestFakeTensor, globals()) +instantiate_device_type_tests(TestTags, globals()) if __name__ == "__main__": TestCase._default_dtype_check_enabled = True diff --git a/test/xpu/xpu_op_db.yaml b/test/xpu/xpu_op_db.yaml new file mode 100644 index 0000000000000..c0833b0cd30ea --- /dev/null +++ b/test/xpu/xpu_op_db.yaml @@ -0,0 +1,87 @@ +backend: XPU +supported: + - fill + - zeros + - zeros_like + - clone + - view_as_real + - view_as_complex + - view + - resize_ + - resize_as_ + - add + - sub + - mul + - div + - abs + - bernoulli + - bitwise_and + - bitwise_not + - bitwise_or + - bitwise_xor + - clamp + - clamp_max + - clamp_min + - clone + - copy + - cos + - cumsum + - empty + - eq + - fill + - fmod + - gcd + - ge + - gelu + - gt + - index_add + - index_put + - index_select + - isnan + - le + - log + - lt + - masked_fill + - maximum + - minimum + - mul + - native_dropout_backward + - ne + - neg + - nn.functional.adaptive_avg_pool2d + - nn.functional.threshold + - nonzero + - normal + - pow + - reciprocal + - rsub + - relu + - remainder + - reshape + - rsqrt + - sin + - sqrt + - sum + - tanh + - unfold + - uniform + - view + - where + - zero + - add + - any + - arange + - as_strided + - flip + - tril + - triu + - cat + - log_softmax + - softmax + - scatter + - gather + - max_pool2d_with_indices_backward + - nn.functional.embedding + - nn.functional.unfold + + diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 86940f6c95cdc..e4455a0b912ed 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -702,8 +702,8 @@ def get_desired_device_type_test_bases(except_for=None, only_for=None, include_l test_bases = device_type_test_bases.copy() if allow_mps and TEST_MPS and MPSTestBase not in test_bases: test_bases.append(MPSTestBase) - if only_for == 'xpu' and TEST_XPU and XPUTestBase not in test_bases: - test_bases.append(XPUTestBase) + if (only_for == 'xpu' or 'xpu' in os.getenv(PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY)) and TEST_XPU and XPUTestBase not in test_bases: + test_bases.append(XPUTestBase) # Filter out the device types based on user inputs desired_device_type_test_bases = filter_desired_device_types(test_bases, except_for, only_for) if include_lazy: @@ -1170,15 +1170,20 @@ def efail_fn(slf, *args, **kwargs): class onlyOn: - def __init__(self, device_type): - self.device_type = device_type + def __init__(self, device_type: Union[str, List[str]]): + self.device_types = [] + if isinstance(device_type, str): + self.device_types.append(device_type) + else: + assert isinstance(device_type, list) + self.device_types = device_type def __call__(self, fn): @wraps(fn) def only_fn(slf, *args, **kwargs): - if self.device_type != slf.device_type: - reason = f"Only runs on {self.device_type}" + if slf.device_type not in self.device_types: + reason = f"Only runs on {self.device_types}" raise unittest.SkipTest(reason) return fn(slf, *args, **kwargs) @@ -1375,15 +1380,7 @@ def only_fn(self, *args, **kwargs): return only_fn def onlyCUDAAndXPU(fn): - @wraps(fn) - def only_fn(self, *args, **kwargs): - if self.device_type not in ('cuda', 'xpu'): - reason = f"onlyCUDAAndXPU: doesn't run on {self.device_type}" - raise unittest.SkipTest(reason) - - return fn(self, *args, **kwargs) - - return only_fn + return onlyOn(['cuda', 'xpu'])(fn) def disablecuDNN(fn): diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 872fca9514c8f..8c4db5c3b33cd 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -37,7 +37,7 @@ TEST_WITH_ROCM, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, freeze_rng_state, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR + TEST_WITH_TORCHINDUCTOR, TEST_XPU ) import torch._refs as refs # noqa: F401 @@ -9515,6 +9515,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), + dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9533,6 +9534,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), + dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9584,6 +9586,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): backward_requires_result=True, dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), + dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9603,6 +9606,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), + dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9629,6 +9633,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types_and(torch.bfloat16), dtypesIfCUDA=floating_and_complex_types_and(torch.half), + dtypesIfXPU=floating_and_complex_types_and(torch.half), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9712,6 +9717,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_types_and(torch.bfloat16), dtypesIfCUDA=floating_types_and(torch.half), + dtypesIfXPU=floating_types_and(torch.half), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9741,6 +9747,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=all_types_and_complex_and(torch.bfloat16, torch.half), dtypesIfCUDA=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool), + dtypesIfXPU=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9756,6 +9763,7 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=all_types_and_complex_and(torch.bfloat16, torch.half), dtypesIfCUDA=all_types_and_complex_and(torch.bfloat16, torch.half), + dtypesIfXPU=all_types_and_complex_and(torch.bfloat16, torch.half), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -10455,8 +10463,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_sparse_csc=True, supports_sparse_bsr=True, supports_sparse_bsc=True, - supports_forward_ad=True, - enable_skipped_device=('xpu',)), + supports_forward_ad=True), # NOTE: CPU complex acos produces incorrect outputs (https://github.com/pytorch/pytorch/issues/42952) UnaryUfuncInfo('acos', aliases=('arccos', ), @@ -10564,9 +10571,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestBinaryUfuncs', 'test_reference_numerics_extremal_values', dtypes=(torch.complex64, torch.complex128)), - ), - enable_skipped_device=('xpu',), - ), + )), OpInfo('item', op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.item, inp, *args, **kwargs), ref=np.ndarray.item, @@ -10588,9 +10593,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_autocast'), # Booleans mismatch: AssertionError: False is not true DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake'), - ), - enable_skipped_device=('xpu',), - ), + )), OpInfo('arange', dtypes=all_types_and(torch.bfloat16, torch.float16), supports_out=True, @@ -10626,8 +10629,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # UserWarning not triggered : Resized a non-empty tensor but did not warn about it. DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), - ), - enable_skipped_device=('xpu',)), + )), OpInfo('cauchy', op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.cauchy_, inp, *args, **kwargs), inplace_variant=torch.Tensor.cauchy_, @@ -10817,8 +10819,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'), # test error disabled since rhs non-tensor python scalar is supported DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors'), - ), - enable_skipped_device=('xpu',),), + )), BinaryUfuncInfo('clamp_min', ref=_clamp_min_numpy, dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16), @@ -10836,8 +10837,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'), # test error disabled since rhs non-tensor python scalar is supported DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors'), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('mul', aliases=('multiply',), dtypes=all_types_and_complex_and(torch.chalf, torch.float16, torch.bfloat16, torch.bool), @@ -10850,9 +10850,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_sparse_csr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csr), sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csc), sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsr), - sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsc), - enable_skipped_device=('xpu',)), - + sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsc)), BinaryUfuncInfo('sub', # NumPy has no builtin reference for the alpha kwarg, but it is easy enough to emulate ref=lambda input, other, *, alpha=1: np.subtract(input, np.multiply(alpha, other)), @@ -10888,8 +10886,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestBinaryUfuncs', 'test_reference_numerics_small_values', dtypes=(torch.uint8,)), - ), - enable_skipped_device=('xpu',)), + )), OpInfo('addmm', # This addmm OpInfo is for when alpha and beta are not both equal to 1. # alpha=beta=1 is tested in the following opinfo, because that special case will @@ -11331,8 +11328,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.bitwise_not, dtypes=integral_types_and(torch.bool), operator_variant=operator.invert, - supports_autograd=False, - enable_skipped_device=('xpu',)), + supports_autograd=False), BinaryUfuncInfo('bitwise_left_shift', op=torch.bitwise_left_shift, dtypes=integral_types(), @@ -11467,8 +11463,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # (NumPy reference needs to be extended with memory_format) DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref'), DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref_mps'), - ), - enable_skipped_device=('xpu',)), + ),), OpInfo('contiguous', op=lambda x, *args, **kwargs: x.contiguous(*args, **kwargs), dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf), @@ -11510,8 +11505,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.bool,)), - ), - enable_skipped_device=('xpu',)), + )), UnaryUfuncInfo('positive', ref=np.positive, dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), @@ -11575,7 +11569,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_view_as_real, test_conjugated_samples=False, - enable_skipped_device=('xpu',), ), OpInfo('view_as_complex', dtypes=floating_types_and(torch.half), @@ -11591,8 +11584,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.half,)), # RuntimeError: view size is not compatible with input tensor's size and stride DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('complex', dtypes=floating_types_and(torch.half), supports_forward_ad=True, @@ -11631,7 +11623,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.cos, dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), - dtypesIfXPU=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), assert_autodiffed=True, handles_large_floats=False, supports_forward_ad=True, @@ -11655,8 +11646,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_reference_numerics_large', device_type='cuda', dtypes=(torch.chalf,), active_if=IS_WINDOWS), - ), - enable_skipped_device=('xpu',)), + )), UnaryUfuncInfo('cosh', ref=np_unary_ufunc_integer_promotion_wrapper(np.cosh), dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), @@ -11731,8 +11721,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # cumsum does not handle correctly out= dtypes DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'), ), - sample_inputs_func=sample_inputs_cumulative_ops, - enable_skipped_device=('xpu',)), + sample_inputs_func=sample_inputs_cumulative_ops), OpInfo('cumprod', dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16), supports_forward_ad=True, @@ -11804,9 +11793,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_fwgrad_bwgrad=True, supports_two_python_scalars=True, assert_autodiffed=True, - rhs_make_tensor_kwargs=dict(exclude_zero=True), - enable_skipped_device=('xpu',),), - + rhs_make_tensor_kwargs=dict(exclude_zero=True),), BinaryUfuncInfo('div', aliases=('divide',), variant_test_name='trunc_rounding', @@ -11826,8 +11813,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): skips=( # RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('div', aliases=('divide',), variant_test_name='floor_rounding', @@ -11847,8 +11833,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): skips=( # RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('true_divide', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), @@ -11949,8 +11934,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): supports_autograd=False, sample_inputs_func=sample_inputs_comparison_ops, skips=( - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('fmax', op=torch.fmax, dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool), @@ -11995,12 +11979,12 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_small_values', dtypes=(torch.uint8,)), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('remainder', ref=np.remainder, dtypes=all_types_and(torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), + dtypesIfXPU=all_types_and(torch.float16, torch.bfloat16), # https://github.com/pytorch/pytorch/issues/80411 gradcheck_fast_mode=True, supports_forward_ad=True, @@ -13033,8 +13017,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # RuntimeError: "bitwise_and_cuda" not implemented for 'Half' DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('bitwise_or', ref=np.bitwise_or, dtypes=integral_types_and(torch.bool), @@ -13048,8 +13031,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('bitwise_xor', ref=np.bitwise_xor, dtypes=integral_types_and(torch.bool), @@ -13063,8 +13045,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'), - ), - enable_skipped_device=('xpu',)), + )), BinaryUfuncInfo('heaviside', ref=lambda a, b: ( # necessary because np.heaviside incorrectly returns float64 when passed args of dtype int64 @@ -13098,9 +13079,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_reference_numerics_small_values', - dtypes=(torch.int8,)),), - enable_skipped_device=('xpu',) - ), + dtypes=(torch.int8,)),)), BinaryUfuncInfo('isclose', ref=np.isclose, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), @@ -13289,6 +13268,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_inplace'), DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace'), DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace_all_strides'), + DecorateInfo(unittest.skip("Not support XPU"), 'TestCompositeCompliance', 'test_operator', device_type='xpu', dtypes=None), )), OpInfo('as_strided_scatter', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), @@ -13886,8 +13866,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # RuntimeError: Cannot insert a Tensor that requires grad as a constant. # Consider making it a parameter or input, or detaching the gradient DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)), - DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad', - active_if=TEST_WITH_ROCM) ], sample_inputs_func=sample_inputs_instance_norm, supports_expanded_weight=True,), @@ -14036,6 +14014,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): aten_name='im2col', dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16), dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16), + dtypesIfXPU=floating_and_complex_types_and(torch.half, torch.bfloat16), sample_inputs_func=sample_inputs_nn_unfold, # Runs very slowly on slow gradcheck - alternatively reduce input sizes gradcheck_fast_mode=True, @@ -15208,10 +15187,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'), DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness', device_type='cpu', dtypes=(torch.bfloat16, torch.float16)), - # Trying to use forward AD with miopen_batch_norm that does not support it - # because it has not been implemented yet. - DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad', - device_type="cuda", active_if=TEST_WITH_ROCM), DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-05, rtol=1e-05)}), 'TestCompositeCompliance', 'test_forward_ad', device_type="cpu"), )), @@ -15415,8 +15390,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # May not replicate in CI DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'), DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'), - ), - enable_skipped_device=('xpu',)), + )), UnaryUfuncInfo('nn.functional.relu6', aten_name="relu6", dtypes=all_types_and(torch.half, torch.bfloat16), @@ -15586,12 +15560,14 @@ def reference_flatten(input, start_dim=0, end_dim=-1): BinaryUfuncInfo('pow', dtypes=all_types_and_complex_and(torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), + dtypesIfXPU=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), ref=np.power, # Due to AVX2 currently not being fully supported for Float16, log_vml_cpu can't be enabled # for Float16, causing this test to fail. pow's autograd for Float16 is thus currently # unsupported on CPU. backward_dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16), backward_dtypesIfCUDA=floating_and_complex_types_and(torch.bfloat16, torch.half, torch.chalf), + backward_dtypesIfXPU=floating_and_complex_types_and(torch.bfloat16, torch.half, torch.chalf), # https://github.com/pytorch/pytorch/issues/80411 gradcheck_fast_mode=True, supports_inplace_autograd=False, @@ -16393,6 +16369,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): domain=(0, None), dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), + dtypesIfXPU=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), decorators=(precisionOverride({torch.half: 5e-2}),), assert_autodiffed=True, supports_forward_ad=True, @@ -16871,6 +16848,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): OpInfo('gather', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), + dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), sample_inputs_func=sample_inputs_gather, gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, supports_forward_ad=True, @@ -16905,6 +16883,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): OpInfo('index_select', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), backward_dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf), + backward_dtypesIfXPU=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf), sample_inputs_func=sample_inputs_index, reference_inputs_func=partial(sample_inputs_index, reference=True), error_inputs_func=error_inputs_index_select, @@ -17259,8 +17238,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsr), sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsc), skips=( - ), - enable_skipped_device=('xpu',)), + )), OpInfo('ones_like', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), supports_out=False, @@ -17437,8 +17415,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # UserWarning not triggered : Resized a non-empty tensor but did not warn about it. DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), - ), - enable_skipped_device=('xpu',)), + )), OpInfo('full', op=torch.full, supports_autograd=False, @@ -17603,8 +17580,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.skip("Expected: empty is not comparable"), 'TestCommon', 'test_complex_half_reference_testing'), DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'), - ), - enable_skipped_device=('xpu',)), + )), OpInfo('eye', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), sample_inputs_func=sample_inputs_eye, @@ -17725,6 +17701,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_variant=None, dtypes=floating_types_and(torch.bfloat16, torch.half), dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.half), + dtypesIfXPU=floating_types_and(torch.bfloat16, torch.half), supports_out=True, sample_inputs_func=sample_inputs_normal_tensor_first, skips=( @@ -17754,6 +17731,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_variant=None, dtypes=floating_types_and(torch.bfloat16, torch.half), dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.half), + dtypesIfXPU=floating_types_and(torch.bfloat16, torch.half), supports_out=True, sample_inputs_func=sample_inputs_normal_tensor_second, skips=( @@ -17802,8 +17780,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'), # UserWarning not triggered : Resized a non-empty tensor but did not warn about it. DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), - DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')), - enable_skipped_device=('xpu',)), + DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'))), OpInfo('scatter_add', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), sample_inputs_func=sample_inputs_scatter_add, @@ -18052,8 +18029,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): sample_inputs_func=sample_repeat_tile, skips=( DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), - ), - enable_skipped_device=('xpu',)), + )), OpInfo('squeeze', ref=_squeeze_ref, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), @@ -18102,8 +18078,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'), DecorateInfo(unittest.skip("No fill_ op"), 'TestCudaFuserOpInfo'), DecorateInfo(unittest.skip("No fill_ op"), 'TestNNCOpInfo'), - ), - enable_skipped_device=('xpu',)), + )), OpInfo('resize_', op=lambda x, shape: x.clone().resize_(shape), method_variant=None, @@ -18120,8 +18095,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"), DecorateInfo(unittest.skip("Allowed exception"), 'TestCompositeCompliance', 'test_operator'), ), - sample_inputs_func=sample_inputs_resize_ops, - enable_skipped_device=('xpu',)), + sample_inputs_func=sample_inputs_resize_ops), OpInfo('resize_as_', op=lambda x, other: torch.resize_as_(x.clone(), other), method_variant=None, @@ -18134,8 +18108,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'), DecorateInfo(unittest.skip('Allowed exemption'), 'TestCompositeCompliance', 'test_operator'), ), - sample_inputs_func=sample_inputs_resize_ops, - enable_skipped_device=('xpu',)), + sample_inputs_func=sample_inputs_resize_ops), OpInfo('take_along_dim', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), @@ -18949,6 +18922,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): aten_name="native_dropout_backward", dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool), dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16), + dtypesIfXPU=floating_types_and(torch.float16, torch.bfloat16), supports_out=False, sample_inputs_func=sample_inputs_dropout_backward, skips=( @@ -19244,7 +19218,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # FIXME: uint8 input returns uint8 instead of bool DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_result_dtype', dtypes=[torch.uint8]), ), - enable_skipped_device=('xpu',), ), ReductionOpInfo( 'any', @@ -19523,6 +19496,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): promotes_int_to_int64=True, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), + dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), ref=reference_reduction_numpy(np.sum), error_inputs_sparse_func=error_inputs_sparse_reduction_sum, sample_inputs_sparse_coo_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_coo), @@ -19934,6 +19908,58 @@ def reference_flatten(input, start_dim=0, end_dim=-1): op_db += opinfo.definitions.op_db +# def enable_skipped_device(op_db_list: List[OpInfo]): +# if TEST_XPU: +# import os, yaml +# device = 'xpu' +# op_db_dict = {} + +# xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" +# if os.path.exists(xpu_op_db): +# with open(xpu_op_db) as stream: +# try: +# op_db_dict = yaml.safe_load(stream) +# except yaml.YAMLError: +# print("Error in loading xpu_op_db.yaml.") + +# for op in op_db_list: +# if not op_db_dict or op.name not in op_db_dict['supported']: +# if hasattr(op, "torch_opinfo"): +# # import pdb +# # pdb.set_trace() +# torch_opinfo = getattr(op, "torch_opinfo") +# if hasattr(torch_opinfo, 'name') and torch_opinfo.name in op_db_dict['supported']: +# continue + +# if op.skips is not None: +# op.skips = (*op.skips, DecorateInfo(unittest.skip, device_type=device, dtypes=None)) +# op.decorators = (*op.decorators, DecorateInfo(unittest.skip, device_type=device, dtypes=None)) +# else: +# op.skips = (DecorateInfo(unittest.skip, device_type=device, dtypes=None)) +# op.decorators = (DecorateInfo(unittest.skip, device_type=device, dtypes=None)) +# return op_db_list + +#op_db = enable_skipped_device(op_db) + +# import os, yaml +# xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" +# if os.path.exists(xpu_op_db): +# with open(xpu_op_db) as stream: +# try: +# op_db_dict = yaml.safe_load(stream) +# except yaml.YAMLError: +# print("Error in loading xpu_op_db.yaml.") + +# for op in op_db: +# if op.name not in op_db_dict['supported']: +# if op.name == "__rpow__": +# import pdb +# pdb.set_trace() +# print("***", op.name, op.decorators) + + + + # Separate registry for experimental Python Reference OpInfos. python_ref_db = [ # @@ -22906,6 +22932,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ), ] python_ref_db += opinfo.definitions.python_ref_db +#python_ref_db = enable_skipped_device(python_ref_db) # Common operator groupings ops_and_refs = op_db + python_ref_db diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 6e6e1c596fd48..20c1dde4dbbff 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -35,6 +35,7 @@ import types import unittest import warnings +import yaml from collections.abc import Mapping, Sequence from contextlib import closing, contextmanager from copy import deepcopy @@ -5051,3 +5052,16 @@ def repl_frame(m): s = re.sub(r"Cannot export model.+\n\n", "", s) s = re.sub(r" +$", "", s, flags=re.M) return s + +def enable_skipped_op_dict(): + enable_op_dict = {} + if TEST_XPU: + device = 'xpu' + xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" + if os.path.exists(xpu_op_db): + with open(xpu_op_db) as stream: + try: + enable_op_dict = yaml.safe_load(stream) + except yaml.YAMLError: + print("Error in loading xpu_op_db.yaml.") + return enable_op_dict \ No newline at end of file diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index 5d1ac019c8b4b..419c338611ca6 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -33,6 +33,8 @@ TEST_WITH_ROCM, torch_to_numpy_dtype_dict, TrackedInputIter, + TEST_XPU, + enable_skipped_op_dict, ) from torch.testing._internal.opinfo import utils @@ -686,9 +688,6 @@ class OpInfo: # skip the test for a device skip_device: Tuple = tuple() - # enable the test for a device - enable_skipped_device: Tuple = tuple() - # decorators to apply to generated tests decorators: Tuple = tuple() @@ -900,6 +899,15 @@ class OpInfo: is_factory_function: bool = False + def enable_skipped_device(self): + op_db_dict = enable_skipped_op_dict() + if TEST_XPU and (not op_db_dict or self.name not in op_db_dict['supported']): + if self.skips is not None: + self.skips = (*self.skips, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + else: + self.skips = (DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + + def __post_init__(self): self._original_opinfo_args = asdict(self).copy() @@ -1024,13 +1032,7 @@ def __post_init__(self): else: self.inplace_operator_variant = None - # Skip XPU test by default - self.skip_device = ('xpu',) - for device in (set(self.skip_device).difference(set(self.enable_skipped_device))): - if self.skips is not None: - self.skips = (*self.skips, DecorateInfo(unittest.skip, device_type=device, dtypes=None)) - else: - self.skips = (DecorateInfo(unittest.skip, device_type=device, dtypes=None)) + self.enable_skipped_device() self.decorators = (*self.decorators, *self.skips) From d7af50b8db1a80df966b89caf8d38995e95362ef Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 26 May 2024 02:36:21 -0700 Subject: [PATCH 07/37] remove unused skip_device field in opInfo --- torch/testing/_internal/opinfo/core.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index 419c338611ca6..d760feac3ed10 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -685,9 +685,6 @@ class OpInfo: # information about which tests to skip skips: Tuple = tuple() - # skip the test for a device - skip_device: Tuple = tuple() - # decorators to apply to generated tests decorators: Tuple = tuple() From d99c6a9f7a1d08cd1fb800f60c5ff5fba7580a89 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 28 May 2024 23:32:58 -0700 Subject: [PATCH 08/37] code clean up --- test/test_ops.py | 1 - torch/testing/_internal/common_device_type.py | 11 +--- .../_internal/common_methods_invocations.py | 59 ++----------------- torch/testing/_internal/common_utils.py | 2 - torch/testing/_internal/opinfo/core.py | 6 -- 5 files changed, 8 insertions(+), 71 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index ecefb53195788..3f4684bf85610 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -166,7 +166,6 @@ def tearDownClass(cls): @onlyCUDAAndXPU @deviceCountAtLeast(2) @ops(op_db, allowed_dtypes=(torch.float32, torch.long)) - #@ops(_xpu_computation_ops, dtypes=any_common_cpu_device_one) def test_multiple_devices(self, devices, dtype, op): for cuda_device_str in devices: cuda_device = torch.device(cuda_device_str) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index e4455a0b912ed..1ed3ccd672ecd 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -402,6 +402,7 @@ def instantiate_test_helper(cls, name, *, test, param_kwargs=None, decorator_fn= if hasattr(test, 'num_required_devices'): device_arg = cls.get_all_devices() _update_param_kwargs(param_kwargs, 'device', device_arg) + # Apply decorators based on param kwargs. for decorator in decorator_fn(param_kwargs): test = decorator(test) @@ -436,8 +437,7 @@ def instantiated_test(self, param_kwargs=param_kwargs): return result assert not hasattr(cls, name), f"Redefinition of test {name}" - #import pdb - #pdb.set_trace() + setattr(cls, name, instantiated_test) def default_parametrize_fn(test, generic_cls, device_cls): @@ -449,8 +449,7 @@ def default_parametrize_fn(test, generic_cls, device_cls): # If one of the @dtypes* decorators is present, also parametrize over the dtypes set by it. dtypes = cls._get_dtypes(test) - #import pdb - #pdb.set_trace() + if dtypes is not None: def dtype_parametrize_fn(test, generic_cls, device_cls, dtypes=dtypes): @@ -476,7 +475,6 @@ def dtype_parametrize_fn(test, generic_cls, device_cls, dtypes=dtypes): dtype_kwarg = param_kwargs['dtypes'] if 'dtypes' in param_kwargs else param_kwargs['dtype'] test_name = f'{name}{test_suffix}{device_suffix}{_dtype_test_suffix(dtype_kwarg)}' - print(test_name) instantiate_test_helper(cls=cls, name=test_name, test=test, param_kwargs=param_kwargs, decorator_fn=decorator_fn) @@ -1004,7 +1002,6 @@ def test_wrapper(*args, **kwargs): decorator_fn = partial(op.get_decorators, generic_cls.__name__, test.__name__, device_cls.device_type, dtype) - #print("create test {} op={} dtype={} param_kwargs={} decorator_fn={}".format(test_name, op, dtype, param_kwargs, decorator_fn)) yield (test_wrapper, test_name, param_kwargs, decorator_fn) except Exception as ex: # Provides an error message for debugging before rethrowing the exception @@ -1605,5 +1602,3 @@ def skipPRIVATEUSE1(fn): # This should probably enumerate all available device type test base classes. def get_all_device_types() -> List[str]: return ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] - - diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 8c4db5c3b33cd..9bded15ad0167 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -13268,7 +13268,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_inplace'), DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace'), DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace_all_strides'), - DecorateInfo(unittest.skip("Not support XPU"), 'TestCompositeCompliance', 'test_operator', device_type='xpu', dtypes=None), + DecorateInfo(unittest.skip("No XPU backend support in this operation"), 'TestCompositeCompliance', 'test_operator', device_type='xpu', dtypes=None), )), OpInfo('as_strided_scatter', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), @@ -13866,6 +13866,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): # RuntimeError: Cannot insert a Tensor that requires grad as a constant. # Consider making it a parameter or input, or detaching the gradient DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)), + DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad', + active_if=TEST_WITH_ROCM) ], sample_inputs_func=sample_inputs_instance_norm, supports_expanded_weight=True,), @@ -15187,6 +15189,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'), DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness', device_type='cpu', dtypes=(torch.bfloat16, torch.float16)), + DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad', + device_type="cuda", active_if=TEST_WITH_ROCM), DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-05, rtol=1e-05)}), 'TestCompositeCompliance', 'test_forward_ad', device_type="cpu"), )), @@ -19908,58 +19912,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): op_db += opinfo.definitions.op_db -# def enable_skipped_device(op_db_list: List[OpInfo]): -# if TEST_XPU: -# import os, yaml -# device = 'xpu' -# op_db_dict = {} - -# xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" -# if os.path.exists(xpu_op_db): -# with open(xpu_op_db) as stream: -# try: -# op_db_dict = yaml.safe_load(stream) -# except yaml.YAMLError: -# print("Error in loading xpu_op_db.yaml.") - -# for op in op_db_list: -# if not op_db_dict or op.name not in op_db_dict['supported']: -# if hasattr(op, "torch_opinfo"): -# # import pdb -# # pdb.set_trace() -# torch_opinfo = getattr(op, "torch_opinfo") -# if hasattr(torch_opinfo, 'name') and torch_opinfo.name in op_db_dict['supported']: -# continue - -# if op.skips is not None: -# op.skips = (*op.skips, DecorateInfo(unittest.skip, device_type=device, dtypes=None)) -# op.decorators = (*op.decorators, DecorateInfo(unittest.skip, device_type=device, dtypes=None)) -# else: -# op.skips = (DecorateInfo(unittest.skip, device_type=device, dtypes=None)) -# op.decorators = (DecorateInfo(unittest.skip, device_type=device, dtypes=None)) -# return op_db_list - -#op_db = enable_skipped_device(op_db) - -# import os, yaml -# xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" -# if os.path.exists(xpu_op_db): -# with open(xpu_op_db) as stream: -# try: -# op_db_dict = yaml.safe_load(stream) -# except yaml.YAMLError: -# print("Error in loading xpu_op_db.yaml.") - -# for op in op_db: -# if op.name not in op_db_dict['supported']: -# if op.name == "__rpow__": -# import pdb -# pdb.set_trace() -# print("***", op.name, op.decorators) - - - - # Separate registry for experimental Python Reference OpInfos. python_ref_db = [ # @@ -22932,7 +22884,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ), ] python_ref_db += opinfo.definitions.python_ref_db -#python_ref_db = enable_skipped_device(python_ref_db) # Common operator groupings ops_and_refs = op_db + python_ref_db diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 20c1dde4dbbff..c44b883aaf0c1 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -390,7 +390,6 @@ def composite_fn(test, generic_cls, device_cls, old_parametrize_fn=old_parametrize_fn, new_parametrize_fn=new_parametrize_fn): old_tests = list(old_parametrize_fn(test, generic_cls, device_cls)) - for (old_test, old_test_name, old_param_kwargs, old_dec_fn) in old_tests: for (new_test, new_test_name, new_param_kwargs, new_dec_fn) in \ new_parametrize_fn(old_test, generic_cls, device_cls): @@ -405,7 +404,6 @@ def composite_fn(test, generic_cls, device_cls, old_test_name) def merged_decorator_fn(param_kwargs, old_dec_fn=old_dec_fn, new_dec_fn=new_dec_fn): - return list(old_dec_fn(param_kwargs)) + list(new_dec_fn(param_kwargs)) yield (new_test, merged_test_name, full_param_kwargs, merged_decorator_fn) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index d760feac3ed10..c9ceec66df913 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -98,14 +98,12 @@ def __init__( self.dtypes = dtypes self.active_if = active_if - # Validate dtypes if self.dtypes is not None: for dtype in self.dtypes: assert isinstance(dtype, torch.dtype) def is_active(self, cls_name, test_name, device_type, dtype, param_kwargs): - return ( self.active_if and (self.cls_name is None or self.cls_name == cls_name) @@ -1165,9 +1163,6 @@ def __post_init__(self): self.aliases = tuple(AliasInfo(a) for a in self.aliases) # type: ignore[assignment] else: self.aliases = () - - - def __call__(self, *args, **kwargs): """Calls the function variant of the operator.""" @@ -1372,7 +1367,6 @@ def sample_inputs_sparse_bsc(self, device, dtype, requires_grad=False, **kwargs) def get_decorators(self, test_class, test_name, device, dtype, param_kwargs): """Returns the decorators targeting the given test.""" result = [] - for decorator in self.decorators: if isinstance(decorator, DecorateInfo): if decorator.is_active( From 0a227700fb36aebba5b3735fa76fc429017c83a2 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 28 May 2024 23:39:30 -0700 Subject: [PATCH 09/37] further cleanup --- torch/testing/_internal/common_methods_invocations.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 9bded15ad0167..9b764dd02dafe 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -15189,6 +15189,8 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'), DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness', device_type='cpu', dtypes=(torch.bfloat16, torch.float16)), + # Trying to use forward AD with miopen_batch_norm that does not support it + # because it has not been implemented yet. DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad', device_type="cuda", active_if=TEST_WITH_ROCM), DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-05, rtol=1e-05)}), From 47055814b3353a5ae974cf86a5fff1c60a835d41 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Thu, 30 May 2024 19:54:15 -0700 Subject: [PATCH 10/37] instead of do unittest.skip for xpu unsupported op in OpInfo, move it to individual test file. added allow_xpu for xpu supported tests. --- test/test_ops.py | 18 +++-- test/xpu/xpu_op_db.yaml | 48 ++++++++++--- torch/testing/_internal/common_device_type.py | 11 +-- .../_internal/common_methods_invocations.py | 69 ++++++++++++------- torch/testing/_internal/common_utils.py | 2 +- torch/testing/_internal/opinfo/core.py | 37 +++++----- 6 files changed, 124 insertions(+), 61 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 3f4684bf85610..9430977d0001c 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -56,6 +56,7 @@ SpectralFuncInfo, UnaryUfuncInfo, xfail, + enable_skipped_device, ) from torch.testing._internal.common_utils import ( @@ -87,6 +88,10 @@ assert torch.get_default_dtype() == torch.float32 + +enable_skipped_device(op_db) +enable_skipped_device(python_ref_db) + # variant testing is only done with torch.float and torch.cfloat to avoid # excessive test times and maximize signal to noise ratio _variant_ops = partial( @@ -109,7 +114,7 @@ ) - +my_op_list = [op for op in python_ref_db if op.name in ['_refs.cos',]] def reduction_dtype_filter(op): if ( @@ -2677,12 +2682,13 @@ def test_strided_layout(self, device, dtype, op): self.assertEqual(strided_result.layout, torch.strided) -instantiate_device_type_tests(TestCommon, globals()) -instantiate_device_type_tests(TestCompositeCompliance, globals()) -instantiate_device_type_tests(TestMathBits, globals()) + +instantiate_device_type_tests(TestCommon, globals(), allow_xpu=True) +instantiate_device_type_tests(TestCompositeCompliance, globals(), allow_xpu=True) +instantiate_device_type_tests(TestMathBits, globals(), allow_xpu=True) instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") -instantiate_device_type_tests(TestFakeTensor, globals()) -instantiate_device_type_tests(TestTags, globals()) +instantiate_device_type_tests(TestFakeTensor, globals(), allow_xpu=True) +instantiate_device_type_tests(TestTags, globals(), allow_xpu=True) if __name__ == "__main__": TestCase._default_dtype_check_enabled = True diff --git a/test/xpu/xpu_op_db.yaml b/test/xpu/xpu_op_db.yaml index c0833b0cd30ea..a9fddfeab1a89 100644 --- a/test/xpu/xpu_op_db.yaml +++ b/test/xpu/xpu_op_db.yaml @@ -24,7 +24,11 @@ supported: - clamp_min - clone - copy - - cos + - cos: + supported: + - complex32 + supported_backward: + - complex32 - cumsum - empty - eq @@ -39,7 +43,11 @@ supported: - index_select - isnan - le - - log + - log: + supported: + - complex32 + supported_backward: + - complex32 - lt - masked_fill - maximum @@ -52,17 +60,41 @@ supported: - nn.functional.threshold - nonzero - normal - - pow + - pow: + supported: + - complex32 + supported_backward: + - complex32 - reciprocal - rsub - relu - remainder - reshape - - rsqrt - - sin - - sqrt - - sum - - tanh + - rsqrt: + supported: + - complex32 + supported_backward: + - complex32 + - sin: + supported: + - complex32 + supported_backward: + - complex32 + - sqrt: + supported: + - complex32 + supported_backward: + - complex32 + - sum: + supported: + - complex32 + supported_backward: + - complex32 + - tanh: + supported: + - complex32 + supported_backward: + - complex32 - unfold - uniform - view diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 1ed3ccd672ecd..b7e669143bf83 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -695,12 +695,12 @@ def filter_desired_device_types(device_type_test_bases, except_for=None, only_fo PYTORCH_TESTING_DEVICE_FOR_CUSTOM_KEY = 'PYTORCH_TESTING_DEVICE_FOR_CUSTOM' -def get_desired_device_type_test_bases(except_for=None, only_for=None, include_lazy=False, allow_mps=False): +def get_desired_device_type_test_bases(except_for=None, only_for=None, include_lazy=False, allow_mps=False, allow_xpu=False): # allow callers to specifically opt tests into being tested on MPS, similar to `include_lazy` test_bases = device_type_test_bases.copy() if allow_mps and TEST_MPS and MPSTestBase not in test_bases: test_bases.append(MPSTestBase) - if (only_for == 'xpu' or 'xpu' in os.getenv(PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY)) and TEST_XPU and XPUTestBase not in test_bases: + if (allow_xpu or only_for == 'xpu') and TEST_XPU and XPUTestBase not in test_bases: test_bases.append(XPUTestBase) # Filter out the device types based on user inputs desired_device_type_test_bases = filter_desired_device_types(test_bases, except_for, only_for) @@ -745,7 +745,7 @@ def split_if_not_empty(x: str): # device-specific tests (NB: this supports additional @parametrize usage). # # See note "Writing Test Templates" -def instantiate_device_type_tests(generic_test_class, scope, except_for=None, only_for=None, include_lazy=False, allow_mps=False): +def instantiate_device_type_tests(generic_test_class, scope, except_for=None, only_for=None, include_lazy=False, allow_mps=False, allow_xpu=False): # Removes the generic test class from its enclosing scope so its tests # are not discoverable. del scope[generic_test_class.__name__] @@ -765,7 +765,7 @@ def instantiate_device_type_tests(generic_test_class, scope, except_for=None, on generic_tests = [x for x in generic_members if x.startswith('test')] # Creates device-specific test cases - for base in get_desired_device_type_test_bases(except_for, only_for, include_lazy, allow_mps): + for base in get_desired_device_type_test_bases(except_for, only_for, include_lazy, allow_mps, allow_xpu): class_name = generic_test_class.__name__ + base.device_type.upper() # type set to Any and suppressed due to unsupport runtime class: @@ -1097,6 +1097,9 @@ def _has_sufficient_memory(device, size): device = 'cuda:0' return torch.cuda.memory.mem_get_info(device)[0] >= size + if device == 'xpu': + raise unittest.SkipTest('TODO: Memory availability checks for XPU?') + if device == 'xla': raise unittest.SkipTest('TODO: Memory availability checks for XLA?') diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 9b764dd02dafe..2673316526537 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -37,7 +37,7 @@ TEST_WITH_ROCM, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, freeze_rng_state, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR, TEST_XPU + TEST_WITH_TORCHINDUCTOR, TEST_XPU, enable_skipped_op_dict, ) import torch._refs as refs # noqa: F401 @@ -9515,7 +9515,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), - dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9534,7 +9533,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), - dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9586,7 +9584,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): backward_requires_result=True, dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), - dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9606,7 +9603,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types(), dtypesIfCUDA=floating_and_complex_types_and(torch.half,), - dtypesIfXPU=floating_and_complex_types_and(torch.half,), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9633,7 +9629,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_and_complex_types_and(torch.bfloat16), dtypesIfCUDA=floating_and_complex_types_and(torch.half), - dtypesIfXPU=floating_and_complex_types_and(torch.half), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9717,7 +9712,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=floating_types_and(torch.bfloat16), dtypesIfCUDA=floating_types_and(torch.half), - dtypesIfXPU=floating_types_and(torch.half), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9747,7 +9741,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=all_types_and_complex_and(torch.bfloat16, torch.half), dtypesIfCUDA=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool), - dtypesIfXPU=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -9763,7 +9756,6 @@ def __call__(self, opinfo, device, dtype, requires_grad, **kwargs): sample_inputs_func=foreach_inputs_sample_func(1, False, False), dtypes=all_types_and_complex_and(torch.bfloat16, torch.half), dtypesIfCUDA=all_types_and_complex_and(torch.bfloat16, torch.half), - dtypesIfXPU=all_types_and_complex_and(torch.bfloat16, torch.half), supports_autograd=True, supports_inplace_autograd=True, supports_forward_ad=True, @@ -10429,7 +10421,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.abs, dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), - dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), skips=( DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), 'TestBwdGradients', 'test_inplace_grad', dtypes=(torch.cdouble,)), @@ -11785,7 +11776,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): variant_test_name='no_rounding_mode', dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), - dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf), # Runs very slowly on slow gradcheck - alternatively reduce input sizes gradcheck_fast_mode=True, supports_forward_ad=True, @@ -11959,7 +11949,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.fmod, dtypes=all_types_and(torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), - dtypesIfXPU=all_types_and(torch.float16, torch.bfloat16), # https://github.com/pytorch/pytorch/issues/80411 gradcheck_fast_mode=True, supports_forward_ad=True, @@ -11984,7 +11973,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): ref=np.remainder, dtypes=all_types_and(torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), - dtypesIfXPU=all_types_and(torch.float16, torch.bfloat16), # https://github.com/pytorch/pytorch/issues/80411 gradcheck_fast_mode=True, supports_forward_ad=True, @@ -14016,7 +14004,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): aten_name='im2col', dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16), dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16), - dtypesIfXPU=floating_and_complex_types_and(torch.half, torch.bfloat16), sample_inputs_func=sample_inputs_nn_unfold, # Runs very slowly on slow gradcheck - alternatively reduce input sizes gradcheck_fast_mode=True, @@ -15566,14 +15553,12 @@ def reference_flatten(input, start_dim=0, end_dim=-1): BinaryUfuncInfo('pow', dtypes=all_types_and_complex_and(torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), - dtypesIfXPU=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf), ref=np.power, # Due to AVX2 currently not being fully supported for Float16, log_vml_cpu can't be enabled # for Float16, causing this test to fail. pow's autograd for Float16 is thus currently # unsupported on CPU. backward_dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16), backward_dtypesIfCUDA=floating_and_complex_types_and(torch.bfloat16, torch.half, torch.chalf), - backward_dtypesIfXPU=floating_and_complex_types_and(torch.bfloat16, torch.half, torch.chalf), # https://github.com/pytorch/pytorch/issues/80411 gradcheck_fast_mode=True, supports_inplace_autograd=False, @@ -16375,7 +16360,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): domain=(0, None), dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), - dtypesIfXPU=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16), decorators=(precisionOverride({torch.half: 5e-2}),), assert_autodiffed=True, supports_forward_ad=True, @@ -16854,7 +16838,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): OpInfo('gather', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), - dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), sample_inputs_func=sample_inputs_gather, gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, supports_forward_ad=True, @@ -16889,7 +16872,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): OpInfo('index_select', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), backward_dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf), - backward_dtypesIfXPU=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf), sample_inputs_func=sample_inputs_index, reference_inputs_func=partial(sample_inputs_index, reference=True), error_inputs_func=error_inputs_index_select, @@ -17707,7 +17689,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_variant=None, dtypes=floating_types_and(torch.bfloat16, torch.half), dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.half), - dtypesIfXPU=floating_types_and(torch.bfloat16, torch.half), supports_out=True, sample_inputs_func=sample_inputs_normal_tensor_first, skips=( @@ -17737,7 +17718,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): inplace_variant=None, dtypes=floating_types_and(torch.bfloat16, torch.half), dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.half), - dtypesIfXPU=floating_types_and(torch.bfloat16, torch.half), supports_out=True, sample_inputs_func=sample_inputs_normal_tensor_second, skips=( @@ -18928,7 +18908,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): aten_name="native_dropout_backward", dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool), dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16), - dtypesIfXPU=floating_types_and(torch.float16, torch.bfloat16), supports_out=False, sample_inputs_func=sample_inputs_dropout_backward, skips=( @@ -19502,7 +19481,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): promotes_int_to_int64=True, dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16), dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), - dtypesIfXPU=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), ref=reference_reduction_numpy(np.sum), error_inputs_sparse_func=error_inputs_sparse_reduction_sum, sample_inputs_sparse_coo_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_coo), @@ -22965,3 +22943,48 @@ def skipOps(test_case_name, base_test_name, to_skip): def wrapped(fn): return fn return wrapped + +def enable_skipped_device(op_db_list: List[OpInfo]): + if TEST_XPU: + # Get the supported op and dtypes from yaml file. + op_db_dict = enable_skipped_op_dict() + supported_op_list = [list(op_dict.keys())[0] if type(op_dict) is dict else op_dict for op_dict in op_db_dict['supported']] + + for op in op_db_list: + # For refs ops get the name of the related torch_opinfo. + torch_opinfo = getattr(op, "torch_opinfo") if hasattr(op, "torch_opinfo") else None + name = torch_opinfo.name if torch_opinfo is not None else op.name + + if name not in supported_op_list: + # If the op is not supported add unittest.skip decorators. + if op.skips is not None: + op.skips = (*op.skips, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + op.decorators = (*op.decorators, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + else: + op.skips = (DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + op.decorators = (DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + else: + ind = supported_op_list.index(name) + + if type(op_db_dict['supported'][ind]) is dict and op_db_dict['supported'][ind][name] != None: + # If the op is supported check whether the supported dtypes is different with cuda + for _key in op_db_dict['supported'][ind][name]: + # Get the dtypes with difference + _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None for _dtype in op_db_dict['supported'][ind][name][_key]] + match _key: + case "unsupported": + op.dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) \ + if type(op.dtypesIfXPU) is set else _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) + case "unsupported_backward": + op.backward_dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) \ + if type(op.backward_dtypesIfXPU) is set else _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) + case "supported": + if type(op.dtypesIfXPU) is set: + op.dtypesIfXPU.update(_dtypes) + else: + op.dtypesIfXPU = _dispatch_dtypes((*op.dtypesIfXPU, *_dtypes)) + case "supported_backward": + if type(op.backward_dtypesIfXPU) is set: + op.backward_dtypesIfXPU.update(_dtypes) + else: + op.backward_dtypesIfXPU = _dispatch_dtypes((*op.backward_dtypesIfXPU, *_dtypes)) \ No newline at end of file diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index c44b883aaf0c1..95e6ddc513810 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -228,7 +228,7 @@ def maybe_load_json(filename): if os.getenv("DISABLED_TESTS_FILE", ""): disabled_tests_dict = maybe_load_json(os.getenv("DISABLED_TESTS_FILE", "")) -NATIVE_DEVICES = ('cpu', 'cuda', 'meta', 'xpu', torch._C._get_privateuse1_backend_name()) +NATIVE_DEVICES = ('cpu', 'cuda', 'xpu', 'meta', torch._C._get_privateuse1_backend_name()) check_names = ['orin', 'concord', 'galen', 'xavier', 'nano', 'jetson', 'tegra'] IS_JETSON = any(name in platform.platform() for name in check_names) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index c9ceec66df913..24158b13ea102 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -34,7 +34,6 @@ torch_to_numpy_dtype_dict, TrackedInputIter, TEST_XPU, - enable_skipped_op_dict, ) from torch.testing._internal.opinfo import utils @@ -726,12 +725,12 @@ class OpInfo: # dtypes this function is expected to work with on CUDA dtypesIfCUDA: _dispatch_dtypes = None - # dtypes this function is expected to work with on XPU - dtypesIfXPU: _dispatch_dtypes = None - # dtypes this function is expected to work with on ROCM dtypesIfROCM: _dispatch_dtypes = None + # dtypes this function is expected to work with on XPU + dtypesIfXPU: _dispatch_dtypes = None + # backward dtypes this function is expected to work with backward_dtypes: _dispatch_dtypes = None @@ -894,13 +893,7 @@ class OpInfo: is_factory_function: bool = False - def enable_skipped_device(self): - op_db_dict = enable_skipped_op_dict() - if TEST_XPU and (not op_db_dict or self.name not in op_db_dict['supported']): - if self.skips is not None: - self.skips = (*self.skips, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) - else: - self.skips = (DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + def __post_init__(self): @@ -908,7 +901,12 @@ def __post_init__(self): assert self.dtypes is not None, f"OpInfo for {self.name} has no dtypes!" - dtypes_args = (self.dtypes, self.dtypesIfCUDA, self.dtypesIfROCM) + dtypes_args = ( + self.dtypes, + self.dtypesIfCUDA, + self.dtypesIfROCM, + self.dtypesIfXPU, + ) # Validates the dtypes are generated from the dispatch-related functions for dtype_list in dtypes_args: @@ -972,7 +970,8 @@ def __post_init__(self): if self.backward_dtypes is not None else self.dtypesIfXPU if self.dtypesIfXPU is not None - else self.dtypes + #else self.dtypes + else self.backward_dtypesIfCUDA ) ) @@ -986,16 +985,16 @@ def __post_init__(self): set(self.dtypesIfCUDA) if self.dtypesIfCUDA is not None else self.dtypes ) - self.dtypesIfXPU = ( - set(self.dtypesIfXPU) if self.dtypesIfXPU is not None else self.dtypes - ) - self.dtypesIfROCM = ( set(self.dtypesIfROCM) if self.dtypesIfROCM is not None else self.dtypesIfCUDA ) + self.dtypesIfXPU = ( + set(self.dtypesIfXPU) if self.dtypesIfXPU is not None else self.dtypesIfCUDA + ) + # NOTE: if the op is unspecified it is assumed to be under the torch namespace if not self.op: self.op = _getattr_qual(torch, self.name) @@ -1027,8 +1026,6 @@ def __post_init__(self): else: self.inplace_operator_variant = None - self.enable_skipped_device() - self.decorators = (*self.decorators, *self.skips) # Specifying sample inputs function without specifying the @@ -2674,6 +2671,7 @@ def __init__( dtypes=floating_types(), dtypesIfCUDA=None, dtypesIfROCM=None, + dtypesIfXPU=None, sample_inputs_func=None, **kwargs, ): @@ -2682,6 +2680,7 @@ def __init__( dtypes=dtypes, dtypesIfCUDA=dtypesIfCUDA, dtypesIfROCM=dtypesIfROCM, + dtypesIfXPU=dtypesIfXPU, sample_inputs_func=sample_inputs_func, **kwargs, ) From d9513ecfcfee7231922b329d0a7832673904871e Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Fri, 31 May 2024 00:31:38 -0700 Subject: [PATCH 11/37] refine function naming --- test/test_ops.py | 8 +++----- torch/testing/_internal/common_device_type.py | 2 -- .../_internal/common_methods_invocations.py | 15 +++++++-------- torch/testing/_internal/common_utils.py | 8 ++++---- torch/testing/_internal/opinfo/core.py | 7 +------ 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 9430977d0001c..96a624f9f13b0 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -56,7 +56,7 @@ SpectralFuncInfo, UnaryUfuncInfo, xfail, - enable_skipped_device, + enable_backend_test, ) from torch.testing._internal.common_utils import ( @@ -89,8 +89,8 @@ assert torch.get_default_dtype() == torch.float32 -enable_skipped_device(op_db) -enable_skipped_device(python_ref_db) +enable_backend_test(op_db) +enable_backend_test(python_ref_db) # variant testing is only done with torch.float and torch.cfloat to avoid # excessive test times and maximize signal to noise ratio @@ -114,8 +114,6 @@ ) -my_op_list = [op for op in python_ref_db if op.name in ['_refs.cos',]] - def reduction_dtype_filter(op): if ( not isinstance(op, ReductionPythonRefInfo) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 5de4900723043..c4c13f9a6cb46 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -437,7 +437,6 @@ def instantiated_test(self, param_kwargs=param_kwargs): return result assert not hasattr(cls, name), f"Redefinition of test {name}" - setattr(cls, name, instantiated_test) def default_parametrize_fn(test, generic_cls, device_cls): @@ -449,7 +448,6 @@ def default_parametrize_fn(test, generic_cls, device_cls): # If one of the @dtypes* decorators is present, also parametrize over the dtypes set by it. dtypes = cls._get_dtypes(test) - if dtypes is not None: def dtype_parametrize_fn(test, generic_cls, device_cls, dtypes=dtypes): diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 01331a14ba0db..71471148e017c 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -37,7 +37,7 @@ TEST_WITH_ROCM, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR, TEST_XPU, enable_skipped_op_dict, + TEST_WITH_TORCHINDUCTOR, TEST_XPU, get_backend_op_dict, ) from torch.testing._utils import wrapper_set_seed @@ -14406,7 +14406,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1): DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_inplace'), DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace'), DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace_all_strides'), - DecorateInfo(unittest.skip("No XPU backend support in this operation"), 'TestCompositeCompliance', 'test_operator', device_type='xpu', dtypes=None), )), OpInfo('as_strided_scatter', dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf), @@ -24116,11 +24115,11 @@ def wrapped(fn): return fn return wrapped -def enable_skipped_device(op_db_list: List[OpInfo]): +def enable_backend_test(op_db_list: List[OpInfo]): if TEST_XPU: # Get the supported op and dtypes from yaml file. - op_db_dict = enable_skipped_op_dict() - supported_op_list = [list(op_dict.keys())[0] if type(op_dict) is dict else op_dict for op_dict in op_db_dict['supported']] + backend_op_dict = get_backend_op_dict() + supported_op_list = [list(op_dict.keys())[0] if type(op_dict) is dict else op_dict for op_dict in backend_op_dict['supported']] for op in op_db_list: # For refs ops get the name of the related torch_opinfo. @@ -24138,11 +24137,11 @@ def enable_skipped_device(op_db_list: List[OpInfo]): else: ind = supported_op_list.index(name) - if type(op_db_dict['supported'][ind]) is dict and op_db_dict['supported'][ind][name] != None: + if type(backend_op_dict['supported'][ind]) is dict and backend_op_dict['supported'][ind][name] != None: # If the op is supported check whether the supported dtypes is different with cuda - for _key in op_db_dict['supported'][ind][name]: + for _key in backend_op_dict['supported'][ind][name]: # Get the dtypes with difference - _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None for _dtype in op_db_dict['supported'][ind][name][_key]] + _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None for _dtype in backend_op_dict['supported'][ind][name][_key]] match _key: case "unsupported": op.dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) \ diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 1243cc33ab88d..6a4f33b4baa47 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -4998,15 +4998,15 @@ def repl_frame(m): s = re.sub(r" +$", "", s, flags=re.M) return s -def enable_skipped_op_dict(): - enable_op_dict = {} +def get_backend_op_dict(): + backend_op_dict = {} if TEST_XPU: device = 'xpu' xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" if os.path.exists(xpu_op_db): with open(xpu_op_db) as stream: try: - enable_op_dict = yaml.safe_load(stream) + backend_op_dict = yaml.safe_load(stream) except yaml.YAMLError: print("Error in loading xpu_op_db.yaml.") - return enable_op_dict \ No newline at end of file + return backend_op_dict \ No newline at end of file diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index eba7496367398..e745e1be1c67b 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -5,8 +5,7 @@ import math import operator import unittest -from dataclasses import InitVar, asdict, dataclass -from typing import Dict, Optional +from dataclasses import asdict, dataclass from enum import Enum from functools import partial from itertools import product @@ -894,9 +893,6 @@ class OpInfo: is_factory_function: bool = False - - - def __post_init__(self): self._original_opinfo_args = asdict(self).copy() @@ -2503,7 +2499,6 @@ def __init__( reference_inputs_func=reference_inputs_func, **kwargs, ) - self.domain = domain self.handles_complex_extremal_values = handles_complex_extremal_values self.handles_large_floats = handles_large_floats From 70283c52f17ffbfa9f72ca5a81b34d67fdae9786 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sat, 1 Jun 2024 20:56:46 -0700 Subject: [PATCH 12/37] skip unsupported xpu test by two means: define the unsupported dtypes in xpu_op_db.yaml or define @skipOps in test case --- test/test_ops.py | 8 ++++++++ test/xpu/xpu_op_db.yaml | 12 ++++++++---- .../_internal/common_methods_invocations.py | 14 +++++++------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 96a624f9f13b0..0d4939707c30c 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -563,6 +563,12 @@ def test_python_ref_torch_fallback(self, device, dtype, op): ], ) @skipIfTorchInductor("Takes too long for inductor") + @skipOps( + "TestCommon", "test_python_ref_executor", (('_refs.mul', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db + ) + @skipOps( + "TestCommon", "test_python_ref_executor", (('_refs.pow', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db + ) def test_python_ref_executor(self, device, dtype, op, executor): if ( TEST_WITH_ROCM @@ -647,6 +653,8 @@ def _to_tensormeta(x): error_inputs = op.error_inputs(device) for ei in error_inputs: + import pdb + pdb.set_trace() si = ei.sample_input meta_sample = si.transform(_to_tensormeta) with self.assertRaisesRegex(ei.error_type, ei.error_regex): diff --git a/test/xpu/xpu_op_db.yaml b/test/xpu/xpu_op_db.yaml index a9fddfeab1a89..9639b0a36126b 100644 --- a/test/xpu/xpu_op_db.yaml +++ b/test/xpu/xpu_op_db.yaml @@ -12,7 +12,13 @@ supported: - add - sub - mul - - div + - div: + unsupported: + - float16 + - bfloat16 + unsupported_backward: + - float16 + - bfloat16 - abs - bernoulli - bitwise_and @@ -114,6 +120,4 @@ supported: - gather - max_pool2d_with_indices_backward - nn.functional.embedding - - nn.functional.unfold - - + - nn.functional.unfold \ No newline at end of file diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 71471148e017c..fcb4111c37f0b 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -24089,8 +24089,9 @@ def skip(op_name, variant_name='', *, device_type=None, dtypes=None): return (op_name, variant_name, device_type, dtypes, False) -def skipOps(test_case_name, base_test_name, to_skip): - all_opinfos = op_db +def skipOps(test_case_name, base_test_name, to_skip, all_opinfos=op_db): + import pdb + pdb.set_trace() for xfail in to_skip: op_name, variant_name, device_type, dtypes, expected_failure = xfail matching_opinfos = [o for o in all_opinfos @@ -24142,19 +24143,18 @@ def enable_backend_test(op_db_list: List[OpInfo]): for _key in backend_op_dict['supported'][ind][name]: # Get the dtypes with difference _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None for _dtype in backend_op_dict['supported'][ind][name][_key]] - match _key: - case "unsupported": + if _key == "unsupported": op.dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) \ if type(op.dtypesIfXPU) is set else _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) - case "unsupported_backward": + if _key == "unsupported_backward": op.backward_dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) \ if type(op.backward_dtypesIfXPU) is set else _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) - case "supported": + if _key == "supported": if type(op.dtypesIfXPU) is set: op.dtypesIfXPU.update(_dtypes) else: op.dtypesIfXPU = _dispatch_dtypes((*op.dtypesIfXPU, *_dtypes)) - case "supported_backward": + if _key == "supported_backward": if type(op.backward_dtypesIfXPU) is set: op.backward_dtypesIfXPU.update(_dtypes) else: From 9c3b81d970a8f0ca1d156c1dfe0d7725c478477c Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 2 Jun 2024 19:24:47 -0700 Subject: [PATCH 13/37] update skipOps decorators for XPU --- test/test_ops.py | 28 ++++++++++++++----- .../_internal/common_methods_invocations.py | 3 -- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 0d4939707c30c..9e3bc874315a5 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -564,10 +564,12 @@ def test_python_ref_torch_fallback(self, device, dtype, op): ) @skipIfTorchInductor("Takes too long for inductor") @skipOps( - "TestCommon", "test_python_ref_executor", (('_refs.mul', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db + "TestCommon", "test_python_ref_executor", \ + (('_refs.mul', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db ) @skipOps( - "TestCommon", "test_python_ref_executor", (('_refs.pow', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db + "TestCommon", "test_python_ref_executor", \ + (('_refs.pow', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db ) def test_python_ref_executor(self, device, dtype, op, executor): if ( @@ -641,6 +643,10 @@ def test_errors_sparse(self, device, op, layout): dtypes=OpDTypes.none, ) @skipIfTorchInductor("Takes too long for inductor") + @skipOps( + "TestCommon", "test_python_ref_errors", \ + (('_refs.where', '', 'xpu', None, False),), all_opinfos=python_ref_db + ) def test_python_ref_errors(self, device, op): mode = FakeTensorMode() with mode: @@ -653,8 +659,6 @@ def _to_tensormeta(x): error_inputs = op.error_inputs(device) for ei in error_inputs: - import pdb - pdb.set_trace() si = ei.sample_input meta_sample = si.transform(_to_tensormeta) with self.assertRaisesRegex(ei.error_type, ei.error_regex): @@ -1025,9 +1029,7 @@ def _case_two_transform(t): wrong_device = "cpu" elif torch.cuda.is_available(): wrong_device = "cuda" - elif torch.xpu.is_available(): - wrong_device = "xpu" - + factory_fn_msg = ( "\n\nNOTE: If your op is a factory function (i.e., it accepts TensorOptions) you should mark its " "OpInfo with `is_factory_function=True`." @@ -1413,6 +1415,18 @@ def convert_boolean_tensors(x): @skipMeta @onlyNativeDeviceTypes @ops(ops_and_refs, dtypes=OpDTypes.none) + @skipOps( + "TestCommon", "test_dtypes", \ + (('div', 'floor_rounding', 'xpu', None, False),), all_opinfos=ops_and_refs + ) + @skipOps( + "TestCommon", "test_dtypes", \ + (('div', 'no_rounding_mode', 'xpu', None, False),), all_opinfos=ops_and_refs + ) + @skipOps( + "TestCommon", "test_dtypes", \ + (('div', 'trunc_rounding', 'xpu', None, False),), all_opinfos=ops_and_refs + ) def test_dtypes(self, device, op): # Check complex32 support only if the op claims. # TODO: Once the complex32 support is better, we should add check for complex32 unconditionally. diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index fcb4111c37f0b..3731e64b76b76 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -24090,8 +24090,6 @@ def skip(op_name, variant_name='', *, device_type=None, dtypes=None): def skipOps(test_case_name, base_test_name, to_skip, all_opinfos=op_db): - import pdb - pdb.set_trace() for xfail in to_skip: op_name, variant_name, device_type, dtypes, expected_failure = xfail matching_opinfos = [o for o in all_opinfos @@ -24126,7 +24124,6 @@ def enable_backend_test(op_db_list: List[OpInfo]): # For refs ops get the name of the related torch_opinfo. torch_opinfo = getattr(op, "torch_opinfo") if hasattr(op, "torch_opinfo") else None name = torch_opinfo.name if torch_opinfo is not None else op.name - if name not in supported_op_list: # If the op is not supported add unittest.skip decorators. if op.skips is not None: From fd3d73b86f0a0cc17c15536ac89b986b8e336a2e Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 3 Jun 2024 00:18:57 -0700 Subject: [PATCH 14/37] update according to comments --- torch/testing/_internal/common_device_type.py | 6 ++++++ torch/testing/_internal/common_methods_invocations.py | 4 ++-- torch/testing/_internal/common_utils.py | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index c4c13f9a6cb46..81c96e8de04a3 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -1612,3 +1612,9 @@ def skipPRIVATEUSE1(fn): # This should probably enumerate all available device type test base classes. def get_all_device_types() -> List[str]: return ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] + +def any_common_cpu_device_one(): + return OpDTypes.any_common_cpu_xpu_one if TEST_XPU else OpDTypes.any_common_cpu_cuda_one + +def has_gpu_device(devices: List[str]): + return "cuda" in devices or "xpu" in devices diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 3731e64b76b76..02c47ab0e189c 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -24114,8 +24114,8 @@ def wrapped(fn): return fn return wrapped -def enable_backend_test(op_db_list: List[OpInfo]): - if TEST_XPU: +def apply_op_db_for(op_db_list: List[OpInfo], device='xpu'): + if TEST_XPU and device == 'xpu': # Get the supported op and dtypes from yaml file. backend_op_dict = get_backend_op_dict() supported_op_list = [list(op_dict.keys())[0] if type(op_dict) is dict else op_dict for op_dict in backend_op_dict['supported']] diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 6a4f33b4baa47..3e955f9a420ce 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -4998,9 +4998,9 @@ def repl_frame(m): s = re.sub(r" +$", "", s, flags=re.M) return s -def get_backend_op_dict(): +def get_backend_op_dict(device='xpu'): backend_op_dict = {} - if TEST_XPU: + if TEST_XPU and device == 'xpu': device = 'xpu' xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" if os.path.exists(xpu_op_db): From f53f4d3b57c6a2cbcbcb1186e732a39dd42de319 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 3 Jun 2024 19:12:28 -0700 Subject: [PATCH 15/37] pass lintrunner --- test/test_ops.py | 69 +++++++++++-------- torch/testing/_internal/common_device_type.py | 13 ++-- .../_internal/common_methods_invocations.py | 43 +++++++----- torch/testing/_internal/common_utils.py | 7 +- 4 files changed, 78 insertions(+), 54 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 9e3bc874315a5..e96b3386ce4b1 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -28,7 +28,9 @@ from torch.testing._internal import composite_compliance, opinfo from torch.testing._internal.common_device_type import ( + any_common_cpu_device_one, deviceCountAtLeast, + has_gpu_device, instantiate_device_type_tests, onlyCPU, onlyCUDA, @@ -45,6 +47,7 @@ integral_types_and, ) from torch.testing._internal.common_methods_invocations import ( + apply_op_db_for, BinaryUfuncInfo, op_db, ops_and_refs, @@ -56,7 +59,6 @@ SpectralFuncInfo, UnaryUfuncInfo, xfail, - enable_backend_test, ) from torch.testing._internal.common_utils import ( @@ -88,9 +90,9 @@ assert torch.get_default_dtype() == torch.float32 - -enable_backend_test(op_db) -enable_backend_test(python_ref_db) +if TEST_XPU: + apply_op_db_for(op_db, device="xpu") + apply_op_db_for(python_ref_db, device="xpu") # variant testing is only done with torch.float and torch.cfloat to avoid # excessive test times and maximize signal to noise ratio @@ -135,11 +137,6 @@ def reduction_dtype_filter(op): aten = torch.ops.aten -def any_common_cpu_device_one(): - return OpDTypes.any_common_cpu_xpu_one if TEST_XPU else OpDTypes.any_common_cpu_cuda_one - -def has_gpu_device(devices: List[str]): - return "cuda" in devices or "xpu" in devices # Tests that apply to all operators and aren't related to any particular # system @@ -306,7 +303,6 @@ def to_cpu(arg): return arg.to(device="cpu") return arg - samples = op.reference_inputs(device, dtype) for sample in samples: @@ -564,12 +560,16 @@ def test_python_ref_torch_fallback(self, device, dtype, op): ) @skipIfTorchInductor("Takes too long for inductor") @skipOps( - "TestCommon", "test_python_ref_executor", \ - (('_refs.mul', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db + "TestCommon", + "test_python_ref_executor", + (("_refs.mul", "", "xpu", (torch.complex32,), False),), + all_opinfos=python_ref_db, ) @skipOps( - "TestCommon", "test_python_ref_executor", \ - (('_refs.pow', '', 'xpu', (torch.complex32,), False),), all_opinfos=python_ref_db + "TestCommon", + "test_python_ref_executor", + (("_refs.pow", "", "xpu", (torch.complex32,), False),), + all_opinfos=python_ref_db, ) def test_python_ref_executor(self, device, dtype, op, executor): if ( @@ -644,8 +644,10 @@ def test_errors_sparse(self, device, op, layout): ) @skipIfTorchInductor("Takes too long for inductor") @skipOps( - "TestCommon", "test_python_ref_errors", \ - (('_refs.where', '', 'xpu', None, False),), all_opinfos=python_ref_db + "TestCommon", + "test_python_ref_errors", + (("_refs.where", "", "xpu", None, False),), + all_opinfos=python_ref_db, ) def test_python_ref_errors(self, device, op): mode = FakeTensorMode() @@ -824,7 +826,11 @@ def _extract_strides(out): # NOTE: only extracts on the CPU and CUDA device types since some # device types don't have storage def _extract_data_ptrs(out): - if self.device_type != "cpu" and self.device_type != "cuda" and self.device_type != "xpu": + if ( + self.device_type != "cpu" + and self.device_type != "cuda" + and self.device_type != "xpu" + ): return () if isinstance(out, torch.Tensor): @@ -952,7 +958,11 @@ def _extract_strides(out): # NOTE: only extracts on the CPU and CUDA device types since some # device types don't have storage def _extract_data_ptrs(out): - if self.device_type != "cpu" and self.device_type != "cuda" and self.device_type != "xpu": + if ( + self.device_type != "cpu" + and self.device_type != "cuda" + and self.device_type != "xpu" + ): return () if isinstance(out, torch.Tensor): @@ -1029,7 +1039,7 @@ def _case_two_transform(t): wrong_device = "cpu" elif torch.cuda.is_available(): wrong_device = "cuda" - + factory_fn_msg = ( "\n\nNOTE: If your op is a factory function (i.e., it accepts TensorOptions) you should mark its " "OpInfo with `is_factory_function=True`." @@ -1416,16 +1426,22 @@ def convert_boolean_tensors(x): @onlyNativeDeviceTypes @ops(ops_and_refs, dtypes=OpDTypes.none) @skipOps( - "TestCommon", "test_dtypes", \ - (('div', 'floor_rounding', 'xpu', None, False),), all_opinfos=ops_and_refs + "TestCommon", + "test_dtypes", + (("div", "floor_rounding", "xpu", None, False),), + all_opinfos=ops_and_refs, ) @skipOps( - "TestCommon", "test_dtypes", \ - (('div', 'no_rounding_mode', 'xpu', None, False),), all_opinfos=ops_and_refs + "TestCommon", + "test_dtypes", + (("div", "no_rounding_mode", "xpu", None, False),), + all_opinfos=ops_and_refs, ) @skipOps( - "TestCommon", "test_dtypes", \ - (('div', 'trunc_rounding', 'xpu', None, False),), all_opinfos=ops_and_refs + "TestCommon", + "test_dtypes", + (("div", "trunc_rounding", "xpu", None, False),), + all_opinfos=ops_and_refs, ) def test_dtypes(self, device, op): # Check complex32 support only if the op claims. @@ -1697,7 +1713,7 @@ def test_forward_ad(self, device, dtype, op): composite_compliance.check_forward_ad_formula( op.get_op(), args, kwargs, op.gradcheck_wrapper, self.assertEqual ) - + @skipXPU @ops(op_db, allowed_dtypes=(torch.float,)) def test_cow_input(self, device, dtype, op): @@ -2702,7 +2718,6 @@ def test_strided_layout(self, device, dtype, op): self.assertEqual(strided_result.layout, torch.strided) - instantiate_device_type_tests(TestCommon, globals(), allow_xpu=True) instantiate_device_type_tests(TestCompositeCompliance, globals(), allow_xpu=True) instantiate_device_type_tests(TestMathBits, globals(), allow_xpu=True) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 81c96e8de04a3..a18842c98a4eb 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -700,7 +700,7 @@ def get_desired_device_type_test_bases(except_for=None, only_for=None, include_l if allow_mps and TEST_MPS and MPSTestBase not in test_bases: test_bases.append(MPSTestBase) if (allow_xpu or only_for == 'xpu') and TEST_XPU and XPUTestBase not in test_bases: - test_bases.append(XPUTestBase) + test_bases.append(XPUTestBase) # Filter out the device types based on user inputs desired_device_type_test_bases = filter_desired_device_types(test_bases, except_for, only_for) if include_lazy: @@ -744,7 +744,8 @@ def split_if_not_empty(x: str): # device-specific tests (NB: this supports additional @parametrize usage). # # See note "Writing Test Templates" -def instantiate_device_type_tests(generic_test_class, scope, except_for=None, only_for=None, include_lazy=False, allow_mps=False, allow_xpu=False): +def instantiate_device_type_tests(generic_test_class, scope, except_for=None, only_for=None, include_lazy=False, + allow_mps=False, allow_xpu=False): # Removes the generic test class from its enclosing scope so its tests # are not discoverable. del scope[generic_test_class.__name__] @@ -833,7 +834,7 @@ class OpDTypes(Enum): any_one = 4 # Test precisely one supported dtype none = 5 # Instantiate no dtype variants (no dtype kwarg needed) any_common_cpu_cuda_one = 6 # Test precisely one supported dtype that is common to both cuda and cpu - any_common_cpu_xpu_one = 7 # Test precisely one supported dtype that is common to both xpu and cpu + any_common_cpu_xpu_one = 7 # Test precisely one supported dtype that is common to both xpu and cpu @@ -1052,8 +1053,8 @@ def __init__(self, dep, reason): class skipXPUIf(skipIf): - def __init__(self, dep, reason): - super().__init__(dep, reason, device_type='xpu') + def __init__(self, dep, reason): + super().__init__(dep, reason, device_type='xpu') # Skips a test on Lazy if the condition is true. class skipLazyIf(skipIf): @@ -1098,7 +1099,7 @@ def _has_sufficient_memory(device, size): if device == 'xpu': raise unittest.SkipTest('TODO: Memory availability checks for XPU?') - + if device == 'xla': raise unittest.SkipTest('TODO: Memory availability checks for XLA?') diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 02c47ab0e189c..8510b238a418e 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -24118,11 +24118,12 @@ def apply_op_db_for(op_db_list: List[OpInfo], device='xpu'): if TEST_XPU and device == 'xpu': # Get the supported op and dtypes from yaml file. backend_op_dict = get_backend_op_dict() - supported_op_list = [list(op_dict.keys())[0] if type(op_dict) is dict else op_dict for op_dict in backend_op_dict['supported']] + supported_op_list = [next(iter(op_dict.keys())) if type(op_dict) is dict else op_dict + for op_dict in backend_op_dict['supported']] for op in op_db_list: # For refs ops get the name of the related torch_opinfo. - torch_opinfo = getattr(op, "torch_opinfo") if hasattr(op, "torch_opinfo") else None + torch_opinfo = op.torch_opinfo if hasattr(op, "torch_opinfo") else None name = torch_opinfo.name if torch_opinfo is not None else op.name if name not in supported_op_list: # If the op is not supported add unittest.skip decorators. @@ -24135,24 +24136,30 @@ def apply_op_db_for(op_db_list: List[OpInfo], device='xpu'): else: ind = supported_op_list.index(name) - if type(backend_op_dict['supported'][ind]) is dict and backend_op_dict['supported'][ind][name] != None: + if type(backend_op_dict['supported'][ind]) is dict and backend_op_dict['supported'][ind][name] is not None: # If the op is supported check whether the supported dtypes is different with cuda for _key in backend_op_dict['supported'][ind][name]: # Get the dtypes with difference - _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None for _dtype in backend_op_dict['supported'][ind][name][_key]] + _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None + for _dtype in backend_op_dict['supported'][ind][name][_key]] if _key == "unsupported": - op.dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) \ - if type(op.dtypesIfXPU) is set else _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) - if _key == "unsupported_backward": - op.backward_dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) \ - if type(op.backward_dtypesIfXPU) is set else _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) - if _key == "supported": - if type(op.dtypesIfXPU) is set: - op.dtypesIfXPU.update(_dtypes) - else: - op.dtypesIfXPU = _dispatch_dtypes((*op.dtypesIfXPU, *_dtypes)) + if type(op.dtypesIfXPU) is set: + op.dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) + else: + _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) + if _key == "unsupported_backward": + if type(op.backward_dtypesIfXPU) is set: + op.backward_dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) + else: + op.backward_dtypesIfXPU = _dispatch_dtypes(filter(lambda x: (x not in _dtypes), + op.backward_dtypesIfXPU)) + if _key == "supported": + if type(op.dtypesIfXPU) is set: + op.dtypesIfXPU.update(_dtypes) + else: + op.dtypesIfXPU = _dispatch_dtypes((*op.dtypesIfXPU, *_dtypes)) if _key == "supported_backward": - if type(op.backward_dtypesIfXPU) is set: - op.backward_dtypesIfXPU.update(_dtypes) - else: - op.backward_dtypesIfXPU = _dispatch_dtypes((*op.backward_dtypesIfXPU, *_dtypes)) + if type(op.backward_dtypesIfXPU) is set: + op.backward_dtypesIfXPU.update(_dtypes) + else: + op.backward_dtypesIfXPU = _dispatch_dtypes((*op.backward_dtypesIfXPU, *_dtypes)) diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 3e955f9a420ce..abc4c3ff946f2 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -5001,12 +5001,13 @@ def repl_frame(m): def get_backend_op_dict(device='xpu'): backend_op_dict = {} if TEST_XPU and device == 'xpu': - device = 'xpu' - xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") else os.getcwd() + "../xpu/xpu_op_db.yaml" + device = 'xpu' + xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") \ + else os.getcwd() + "../xpu/xpu_op_db.yaml" if os.path.exists(xpu_op_db): with open(xpu_op_db) as stream: try: backend_op_dict = yaml.safe_load(stream) except yaml.YAMLError: print("Error in loading xpu_op_db.yaml.") - return backend_op_dict \ No newline at end of file + return backend_op_dict From 5a2382fc2249eeb1186f7f842a3ce34fe3fba9cf Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 4 Jun 2024 02:12:07 -0700 Subject: [PATCH 16/37] skip mul and where --- test/test_ops.py | 36 ----- test/xpu/op_db.yaml | 74 +++++++++++ test/xpu/xpu_op_db.yaml | 123 ------------------ .../_internal/common_methods_invocations.py | 60 ++------- torch/testing/_internal/common_utils.py | 15 +-- 5 files changed, 93 insertions(+), 215 deletions(-) create mode 100644 test/xpu/op_db.yaml delete mode 100644 test/xpu/xpu_op_db.yaml diff --git a/test/test_ops.py b/test/test_ops.py index e96b3386ce4b1..01e683ab907ab 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -559,18 +559,6 @@ def test_python_ref_torch_fallback(self, device, dtype, op): ], ) @skipIfTorchInductor("Takes too long for inductor") - @skipOps( - "TestCommon", - "test_python_ref_executor", - (("_refs.mul", "", "xpu", (torch.complex32,), False),), - all_opinfos=python_ref_db, - ) - @skipOps( - "TestCommon", - "test_python_ref_executor", - (("_refs.pow", "", "xpu", (torch.complex32,), False),), - all_opinfos=python_ref_db, - ) def test_python_ref_executor(self, device, dtype, op, executor): if ( TEST_WITH_ROCM @@ -643,12 +631,6 @@ def test_errors_sparse(self, device, op, layout): dtypes=OpDTypes.none, ) @skipIfTorchInductor("Takes too long for inductor") - @skipOps( - "TestCommon", - "test_python_ref_errors", - (("_refs.where", "", "xpu", None, False),), - all_opinfos=python_ref_db, - ) def test_python_ref_errors(self, device, op): mode = FakeTensorMode() with mode: @@ -1425,24 +1407,6 @@ def convert_boolean_tensors(x): @skipMeta @onlyNativeDeviceTypes @ops(ops_and_refs, dtypes=OpDTypes.none) - @skipOps( - "TestCommon", - "test_dtypes", - (("div", "floor_rounding", "xpu", None, False),), - all_opinfos=ops_and_refs, - ) - @skipOps( - "TestCommon", - "test_dtypes", - (("div", "no_rounding_mode", "xpu", None, False),), - all_opinfos=ops_and_refs, - ) - @skipOps( - "TestCommon", - "test_dtypes", - (("div", "trunc_rounding", "xpu", None, False),), - all_opinfos=ops_and_refs, - ) def test_dtypes(self, device, op): # Check complex32 support only if the op claims. # TODO: Once the complex32 support is better, we should add check for complex32 unconditionally. diff --git a/test/xpu/op_db.yaml b/test/xpu/op_db.yaml new file mode 100644 index 0000000000000..f89fb392a3430 --- /dev/null +++ b/test/xpu/op_db.yaml @@ -0,0 +1,74 @@ +# Owner(s): ["module: intel"] +# Define the supported Aten ops in XPU backend, the dtypes are aligned with other GPUs. +supported_ops: + - fill + - zeros + - zeros_like + - clone + - view_as_real + - view_as_complex + - view + - resize_ + - resize_as_ + - add + - sub + - abs + - bernoulli + - bitwise_and + - bitwise_not + - bitwise_or + - bitwise_xor + - clamp + - clamp_max + - clamp_min + - clone + - copy + - cumsum + - empty + - eq + - fill + - fmod + - gcd + - ge + - gelu + - gt + - index_add + - index_put + - index_select + - isnan + - le + - lt + - masked_fill + - maximum + - minimum + - native_dropout_backward + - ne + - neg + - nn.functional.adaptive_avg_pool2d + - nn.functional.threshold + - nonzero + - normal + - reciprocal + - rsub + - relu + - remainder + - reshape + - unfold + - uniform + - view + - zero + - add + - any + - arange + - as_strided + - flip + - tril + - triu + - cat + - log_softmax + - softmax + - scatter + - gather + - max_pool2d_with_indices_backward + - nn.functional.embedding + - nn.functional.unfold \ No newline at end of file diff --git a/test/xpu/xpu_op_db.yaml b/test/xpu/xpu_op_db.yaml deleted file mode 100644 index 9639b0a36126b..0000000000000 --- a/test/xpu/xpu_op_db.yaml +++ /dev/null @@ -1,123 +0,0 @@ -backend: XPU -supported: - - fill - - zeros - - zeros_like - - clone - - view_as_real - - view_as_complex - - view - - resize_ - - resize_as_ - - add - - sub - - mul - - div: - unsupported: - - float16 - - bfloat16 - unsupported_backward: - - float16 - - bfloat16 - - abs - - bernoulli - - bitwise_and - - bitwise_not - - bitwise_or - - bitwise_xor - - clamp - - clamp_max - - clamp_min - - clone - - copy - - cos: - supported: - - complex32 - supported_backward: - - complex32 - - cumsum - - empty - - eq - - fill - - fmod - - gcd - - ge - - gelu - - gt - - index_add - - index_put - - index_select - - isnan - - le - - log: - supported: - - complex32 - supported_backward: - - complex32 - - lt - - masked_fill - - maximum - - minimum - - mul - - native_dropout_backward - - ne - - neg - - nn.functional.adaptive_avg_pool2d - - nn.functional.threshold - - nonzero - - normal - - pow: - supported: - - complex32 - supported_backward: - - complex32 - - reciprocal - - rsub - - relu - - remainder - - reshape - - rsqrt: - supported: - - complex32 - supported_backward: - - complex32 - - sin: - supported: - - complex32 - supported_backward: - - complex32 - - sqrt: - supported: - - complex32 - supported_backward: - - complex32 - - sum: - supported: - - complex32 - supported_backward: - - complex32 - - tanh: - supported: - - complex32 - supported_backward: - - complex32 - - unfold - - uniform - - view - - where - - zero - - add - - any - - arange - - as_strided - - flip - - tril - - triu - - cat - - log_softmax - - softmax - - scatter - - gather - - max_pool2d_with_indices_backward - - nn.functional.embedding - - nn.functional.unfold \ No newline at end of file diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 8510b238a418e..19785e68282c8 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -24114,52 +24114,18 @@ def wrapped(fn): return fn return wrapped +def apply_op_db_for_xpu(op_db_list: List[OpInfo]): + # Get the supported op from yaml file. + supported_op_list = get_backend_op_dict(device='xpu')['supported_ops'] + + for op in op_db_list: + # For refs ops get the name of the related torch_opinfo. + torch_opinfo = op.torch_opinfo if hasattr(op, "torch_opinfo") else None + name = torch_opinfo.name if torch_opinfo is not None else op.name + if name not in supported_op_list: + # Update op_db, add unittest.skip decorators to skip the op for the backend. + op.decorators = (*op.decorators, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) + def apply_op_db_for(op_db_list: List[OpInfo], device='xpu'): if TEST_XPU and device == 'xpu': - # Get the supported op and dtypes from yaml file. - backend_op_dict = get_backend_op_dict() - supported_op_list = [next(iter(op_dict.keys())) if type(op_dict) is dict else op_dict - for op_dict in backend_op_dict['supported']] - - for op in op_db_list: - # For refs ops get the name of the related torch_opinfo. - torch_opinfo = op.torch_opinfo if hasattr(op, "torch_opinfo") else None - name = torch_opinfo.name if torch_opinfo is not None else op.name - if name not in supported_op_list: - # If the op is not supported add unittest.skip decorators. - if op.skips is not None: - op.skips = (*op.skips, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) - op.decorators = (*op.decorators, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) - else: - op.skips = (DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) - op.decorators = (DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) - else: - ind = supported_op_list.index(name) - - if type(backend_op_dict['supported'][ind]) is dict and backend_op_dict['supported'][ind][name] is not None: - # If the op is supported check whether the supported dtypes is different with cuda - for _key in backend_op_dict['supported'][ind][name]: - # Get the dtypes with difference - _dtypes = [getattr(torch, _dtype) if hasattr(torch, _dtype) else None - for _dtype in backend_op_dict['supported'][ind][name][_key]] - if _key == "unsupported": - if type(op.dtypesIfXPU) is set: - op.dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) - else: - _dispatch_dtypes(filter(lambda x: (x not in _dtypes), op.dtypesIfXPU)) - if _key == "unsupported_backward": - if type(op.backward_dtypesIfXPU) is set: - op.backward_dtypesIfXPU = set(filter(lambda x: (x not in _dtypes), op.backward_dtypesIfXPU)) - else: - op.backward_dtypesIfXPU = _dispatch_dtypes(filter(lambda x: (x not in _dtypes), - op.backward_dtypesIfXPU)) - if _key == "supported": - if type(op.dtypesIfXPU) is set: - op.dtypesIfXPU.update(_dtypes) - else: - op.dtypesIfXPU = _dispatch_dtypes((*op.dtypesIfXPU, *_dtypes)) - if _key == "supported_backward": - if type(op.backward_dtypesIfXPU) is set: - op.backward_dtypesIfXPU.update(_dtypes) - else: - op.backward_dtypesIfXPU = _dispatch_dtypes((*op.backward_dtypesIfXPU, *_dtypes)) + apply_op_db_for_xpu(op_db_list) \ No newline at end of file diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index abc4c3ff946f2..1af948371c846 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -5001,13 +5001,10 @@ def repl_frame(m): def get_backend_op_dict(device='xpu'): backend_op_dict = {} if TEST_XPU and device == 'xpu': - device = 'xpu' - xpu_op_db = os.getcwd() + "/xpu/xpu_op_db.yaml" if os.path.exists(os.getcwd() + "/xpu/xpu_op_db.yaml") \ - else os.getcwd() + "../xpu/xpu_op_db.yaml" - if os.path.exists(xpu_op_db): + xpu_op_db = CI_TEST_PREFIX + "/" + device + "/op_db.yaml" + try: with open(xpu_op_db) as stream: - try: - backend_op_dict = yaml.safe_load(stream) - except yaml.YAMLError: - print("Error in loading xpu_op_db.yaml.") - return backend_op_dict + backend_op_dict = yaml.safe_load(stream) + except yaml.YAMLError or FileExistsError: + print("Error in loading op_db.yaml.") + return backend_op_dict \ No newline at end of file From b1c0fff3245367afa1d415d57f16ea3b0536bb1f Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Wed, 5 Jun 2024 19:21:44 -0700 Subject: [PATCH 17/37] rollback a change in ops() --- torch/testing/_internal/common_device_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index a18842c98a4eb..cd2f73c6bfacc 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -957,7 +957,7 @@ def _parametrize_test(self, test, generic_cls, device_cls): else: raise RuntimeError(f"Unknown OpDType: {self.opinfo_dtypes}") - if self.allowed_dtypes is not None and dtypes is not None: + if self.allowed_dtypes is not None: dtypes = dtypes.intersection(self.allowed_dtypes) # Construct the test name; device / dtype parts are handled outside. From 775db6ebf9a4ba48d3ce443b0fa5b8e8aea9c9a5 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Wed, 5 Jun 2024 19:57:30 -0700 Subject: [PATCH 18/37] remove unused comments --- torch/testing/_internal/opinfo/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index e745e1be1c67b..df97f6e8ebc28 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -967,7 +967,6 @@ def __post_init__(self): if self.backward_dtypes is not None else self.dtypesIfXPU if self.dtypesIfXPU is not None - #else self.dtypes else self.backward_dtypesIfCUDA ) ) From 10fd73155a6a2d898b32d357018693e4f164cb62 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Thu, 6 Jun 2024 23:17:04 -0700 Subject: [PATCH 19/37] fix lint issue --- torch/testing/_internal/opinfo/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index df97f6e8ebc28..a5cc8689a86e1 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -31,7 +31,6 @@ TEST_WITH_ROCM, torch_to_numpy_dtype_dict, TrackedInputIter, - TEST_XPU, ) from torch.testing._internal.opinfo import utils From 557aa73250ff02c67c215998137202a424379480 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Wed, 12 Jun 2024 22:20:14 -0700 Subject: [PATCH 20/37] disable bernoulli --- test/xpu/op_db.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/xpu/op_db.yaml b/test/xpu/op_db.yaml index f89fb392a3430..8eaa39bbb010d 100644 --- a/test/xpu/op_db.yaml +++ b/test/xpu/op_db.yaml @@ -13,7 +13,6 @@ supported_ops: - add - sub - abs - - bernoulli - bitwise_and - bitwise_not - bitwise_or From c6965a119e946d660b535a9171550c357d17d9cc Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Thu, 13 Jun 2024 23:55:04 -0700 Subject: [PATCH 21/37] disable nn.funcitonal.embedding --- test/xpu/op_db.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/xpu/op_db.yaml b/test/xpu/op_db.yaml index 8eaa39bbb010d..144513cdc7f5a 100644 --- a/test/xpu/op_db.yaml +++ b/test/xpu/op_db.yaml @@ -69,5 +69,4 @@ supported_ops: - scatter - gather - max_pool2d_with_indices_backward - - nn.functional.embedding - - nn.functional.unfold \ No newline at end of file + - nn.functional.unfold From 5607564d8fcbca3f084dc67fa720f0b0c62ee0b3 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 23 Jun 2024 23:30:01 -0700 Subject: [PATCH 22/37] refine format --- torch/testing/_internal/common_device_type.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 32198e7cb6a1f..5a48385f0149f 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -958,8 +958,12 @@ class OpDTypes(Enum): unsupported_backward = 3 # Test only unsupported backward dtypes any_one = 4 # Test precisely one supported dtype none = 5 # Instantiate no dtype variants (no dtype kwarg needed) - any_common_cpu_cuda_one = 6 # Test precisely one supported dtype that is common to both cuda and cpu - any_common_cpu_xpu_one = 7 # Test precisely one supported dtype that is common to both xpu and cpu + any_common_cpu_cuda_one = ( + 6 # Test precisely one supported dtype that is common to both cuda and cpu + ) + any_common_cpu_xpu_one = ( + 7 # Test precisely one supported dtype that is common to both xpu and cpu + ) # Arbitrary order From f80a48635e341998d7b43e51f4f61098b9c210d1 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Sun, 23 Jun 2024 23:55:00 -0700 Subject: [PATCH 23/37] lint format --- torch/testing/_internal/common_device_type.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 5a48385f0149f..4bc3a4bdfd428 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -792,7 +792,7 @@ def get_desired_device_type_test_bases( test_bases = device_type_test_bases.copy() if allow_mps and TEST_MPS and MPSTestBase not in test_bases: test_bases.append(MPSTestBase) - if (allow_xpu or only_for == 'xpu') and TEST_XPU and XPUTestBase not in test_bases: + if (allow_xpu or only_for == "xpu") and TEST_XPU and XPUTestBase not in test_bases: test_bases.append(XPUTestBase) if TEST_HPU and HPUTestBase not in test_bases: test_bases.append(HPUTestBase) @@ -1098,7 +1098,9 @@ def _parametrize_test(self, test, generic_cls, device_cls): # Tries to pick a dtype that supports both CPU and CUDA supported = set(op.dtypes).intersection(op.dtypesIfXPU) if supported: - dtypes = {next(dtype for dtype in ANY_DTYPE_ORDER if dtype in supported)} + dtypes = { + next(dtype for dtype in ANY_DTYPE_ORDER if dtype in supported) + } else: dtypes = {} elif self.opinfo_dtypes == OpDTypes.none: @@ -1213,9 +1215,9 @@ def __init__(self, dep, reason): class skipXPUIf(skipIf): - def __init__(self, dep, reason): - super().__init__(dep, reason, device_type='xpu') + super().__init__(dep, reason, device_type="xpu") + # Skips a test on Lazy if the condition is true. class skipLazyIf(skipIf): @@ -1575,8 +1577,10 @@ def only_fn(self, *args, **kwargs): return only_fn + def onlyCUDAAndXPU(fn): - return onlyOn(['cuda', 'xpu'])(fn) + return onlyOn(["cuda", "xpu"])(fn) + def disablecuDNN(fn): @wraps(fn) @@ -1848,9 +1852,11 @@ def skipLazy(fn): def skipMeta(fn): return skipMetaIf(True, "test doesn't work with meta tensors")(fn) + def skipXPU(fn): return skipXPUIf(True, "test doesn't work with XPU tensors")(fn) + def skipXLA(fn): return skipXLAIf(True, "Marked as skipped for XLA")(fn) @@ -1870,10 +1876,16 @@ def skipPRIVATEUSE1(fn): # TODO: the "all" in the name isn't true anymore for quite some time as we have also have for example XLA and MPS now. # This should probably enumerate all available device type test base classes. def get_all_device_types() -> List[str]: - return ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] + return ["cpu"] if not torch.cuda.is_available() else ["cpu", "cuda"] + def any_common_cpu_device_one(): - return OpDTypes.any_common_cpu_xpu_one if TEST_XPU else OpDTypes.any_common_cpu_cuda_one + return ( + OpDTypes.any_common_cpu_xpu_one + if TEST_XPU + else OpDTypes.any_common_cpu_cuda_one + ) + def has_gpu_device(devices: List[str]): return "cuda" in devices or "xpu" in devices From 4c7ac905560fe7cc9dded063571f7b1a23e7e1fd Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 25 Jun 2024 20:06:12 -0700 Subject: [PATCH 24/37] fix an mkldnn blas error message --- aten/src/ATen/native/mkldnn/xpu/Blas.cpp | 4 ++++ test/xpu/test_conv.py | 2 +- test/xpu/test_gemm.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/aten/src/ATen/native/mkldnn/xpu/Blas.cpp b/aten/src/ATen/native/mkldnn/xpu/Blas.cpp index 6cba3f4c9fa18..9dcd6f6bc10c9 100644 --- a/aten/src/ATen/native/mkldnn/xpu/Blas.cpp +++ b/aten/src/ATen/native/mkldnn/xpu/Blas.cpp @@ -131,6 +131,10 @@ Tensor& mm_out(const Tensor& self, const Tensor& mat2, Tensor& result) { "x", mat2.sizes()[1], ")"); + TORCH_CHECK( + self.dtype() == mat2.dtype(), + "expected mat1 and mat2 to have the same dtype, but got: ", self.dtype(), " != ", mat2.dtype() + ) result.resize_({self.size(0), mat2.size(1)}); if (self.numel() == 0 || mat2.numel() == 0) { diff --git a/test/xpu/test_conv.py b/test/xpu/test_conv.py index f3d4375213f02..632d4a356d286 100644 --- a/test/xpu/test_conv.py +++ b/test/xpu/test_conv.py @@ -1264,7 +1264,7 @@ def test_channels_last_ouput_stride(self, device, dtype): assert_size_stride(out, (2, 512, 7, 7), (25088, 1, 3584, 512)) -instantiate_device_type_tests(TestConvolutionNNDeviceType, globals(), only_for="xpu") +instantiate_device_type_tests(TestConvolutionNNDeviceType, globals(), only_for="xpu", allow_xpu=True) if __name__ == "__main__": run_tests() diff --git a/test/xpu/test_gemm.py b/test/xpu/test_gemm.py index 0157677a582f2..2bc6d09eeea73 100644 --- a/test/xpu/test_gemm.py +++ b/test/xpu/test_gemm.py @@ -1142,7 +1142,7 @@ def test_matmul_out_kernel_errors_with_autograd(self, device, dtype): torch.matmul(a, b, out=c) -instantiate_device_type_tests(TestBasicGEMM, globals(), only_for="xpu") +instantiate_device_type_tests(TestBasicGEMM, globals(), only_for="xpu", allow_xpu=True) if __name__ == "__main__": run_tests() From a3c3b038ba3ec98cbc7958032b0f5d9a0bf94d2c Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 23 Jul 2024 14:30:36 +0000 Subject: [PATCH 25/37] reverted skipOps interface and renamed get_backend_op_dict --- torch/testing/_internal/common_methods_invocations.py | 7 ++++--- torch/testing/_internal/common_utils.py | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index b50aea6067d79..c09ec878ddee9 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -37,7 +37,7 @@ TEST_WITH_ROCM, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR, TEST_XPU, get_backend_op_dict, + TEST_WITH_TORCHINDUCTOR, TEST_XPU, get_backend_ops, ) from torch.testing._utils import wrapper_set_seed @@ -24492,7 +24492,8 @@ def skip(op_name, variant_name='', *, device_type=None, dtypes=None): return (op_name, variant_name, device_type, dtypes, False) -def skipOps(test_case_name, base_test_name, to_skip, all_opinfos=op_db): +def skipOps(test_case_name, base_test_name, to_skip): + all_opinfos = op_db for xfail in to_skip: op_name, variant_name, device_type, dtypes, expected_failure = xfail matching_opinfos = [o for o in all_opinfos @@ -24519,7 +24520,7 @@ def wrapped(fn): def apply_op_db_for_xpu(op_db_list: List[OpInfo]): # Get the supported op from yaml file. - supported_op_list = get_backend_op_dict(device='xpu')['supported_ops'] + supported_op_list = get_backend_ops(device='xpu')['supported_ops'] for op in op_db_list: # For refs ops get the name of the related torch_opinfo. diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index bff3501cb43f3..d7ee7a6677e9d 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -5120,13 +5120,13 @@ def repl_frame(m): s = re.sub(r" +$", "", s, flags=re.MULTILINE) return s -def get_backend_op_dict(device='xpu'): - backend_op_dict = {} +def get_backend_ops(device='xpu'): + backend_ops = {} if TEST_XPU and device == 'xpu': xpu_op_db = CI_TEST_PREFIX + "/" + device + "/op_db.yaml" try: with open(xpu_op_db) as stream: - backend_op_dict = yaml.safe_load(stream) + backend_ops = yaml.safe_load(stream) except yaml.YAMLError or FileExistsError: print("Error in loading op_db.yaml.") - return backend_op_dict \ No newline at end of file + return backend_ops \ No newline at end of file From 0b3d57bb0ef423f41341c145b5a98ddc13b57289 Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Tue, 8 Oct 2024 01:54:49 +0000 Subject: [PATCH 26/37] retrigger checks From af96524d469d458dec8424da5cae12f9abc0f630 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Tue, 29 Oct 2024 06:26:33 +0000 Subject: [PATCH 27/37] unified the UT infrastructure api to support diffrent platform, passed TestCommon --- test/test_ops.py | 62 +++++++++---------- torch/testing/_internal/common_device_type.py | 12 ++-- .../_internal/common_methods_invocations.py | 27 ++++++-- torch/testing/_internal/common_utils.py | 27 ++++++++ torch/testing/_internal/opinfo/core.py | 3 +- .../_internal/opinfo/definitions/linalg.py | 55 ++++++++++++++-- .../_internal/opinfo/definitions/special.py | 4 ++ 7 files changed, 144 insertions(+), 46 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 658d19fe77ead..e121f39034c8c 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -27,16 +27,15 @@ from torch.testing._internal.common_device_type import ( any_common_cpu_device_one, deviceCountAtLeast, - has_gpu_device, instantiate_device_type_tests, onlyCPU, - onlyCUDA, - onlyCUDAAndXPU, + onlyGPU, onlyNativeDeviceTypesAnd, OpDTypes, ops, skipMeta, skipXPU, + is_gpu_device, ) from torch.testing._internal.common_dtype import ( all_types_and_complex_and, @@ -87,9 +86,9 @@ assert torch.get_default_dtype() == torch.float32 -if TEST_XPU: - apply_op_db_for(op_db, device="xpu") - apply_op_db_for(python_ref_db, device="xpu") +# if TEST_XPU: +# apply_op_db_for(op_db, device="xpu") +# apply_op_db_for(python_ref_db, device="xpu") # variant testing is only done with torch.float and torch.cfloat to avoid # excessive test times and maximize signal to noise ratio @@ -124,7 +123,7 @@ def reduction_dtype_filter(op): # Create a list of operators that are a subset of _ref_test_ops but don't have a -# numpy ref to compare them too, If both CPU and CUDA are compared to numpy +# numpy ref to compare them too, If both CPU and GPU are compared to numpy # then they do not need to be compared to each other _ops_and_refs_with_no_numpy_ref = [op for op in ops_and_refs if op.ref is None] @@ -155,22 +154,22 @@ def tearDownClass(cls): assert len(filtered_ops) == 0, err_msg - # Validates that each OpInfo works correctly on different CUDA devices - @onlyCUDAAndXPU + # Validates that each OpInfo works correctly on different GPU devices + @onlyGPU @deviceCountAtLeast(2) @ops(op_db, allowed_dtypes=(torch.float32, torch.long)) def test_multiple_devices(self, devices, dtype, op): - for cuda_device_str in devices: - cuda_device = torch.device(cuda_device_str) + for gpu_device_str in devices: + gpu_device = torch.device(gpu_device_str) # NOTE: only tests on first sample - samples = op.sample_inputs(cuda_device, dtype) + samples = op.sample_inputs(gpu_device, dtype) sample = first_sample(self, samples) result = op(sample.input, *sample.args, **sample.kwargs) if isinstance(result, torch.Tensor): - self.assertTrue(result.device == cuda_device) + self.assertTrue(result.device == gpu_device) elif is_iterable_of_tensors(result): - self.assertTrue(all(t.device == cuda_device for t in result)) + self.assertTrue(all(t.device == gpu_device for t in result)) else: self.skipTest( "Skipped! Only supports single tensor or iterable of tensor outputs." @@ -275,7 +274,7 @@ def test_numpy_ref(self, device, dtype, op): and op.formatted_name in ("signal_windows_exponential", "signal_windows_bartlett") and dtype == torch.float64 - and has_gpu_device(device) + and is_gpu_device(device) or "cpu" in device ): # noqa: E121 raise unittest.SkipTest("XXX: raises tensor-likes are not close.") @@ -288,7 +287,7 @@ def test_numpy_ref(self, device, dtype, op): ) # Tests that the cpu and gpu results are consistent - @onlyCUDAAndXPU + @onlyGPU @suppress_warnings @slowTest @ops(_ops_and_refs_with_no_numpy_ref, dtypes=any_common_cpu_device_one()) @@ -302,20 +301,20 @@ def to_cpu(arg): for sample in samples: cpu_sample = sample.transform(to_cpu) - cuda_results = op(sample.input, *sample.args, **sample.kwargs) + gpu_results = op(sample.input, *sample.args, **sample.kwargs) cpu_results = op(cpu_sample.input, *cpu_sample.args, **cpu_sample.kwargs) # output_process_fn_grad has a very unfortunate name # We use this function in linalg extensively to postprocess the inputs of functions # that are not completely well-defined. Think svd and muliplying the singular vectors by -1. - # CPU and CUDA implementations of the SVD can return valid SVDs that are different. + # CPU and GPU implementations of the SVD can return valid SVDs that are different. # We use this function to compare them. - cuda_results = sample.output_process_fn_grad(cuda_results) + gpu_results = sample.output_process_fn_grad(gpu_results) cpu_results = cpu_sample.output_process_fn_grad(cpu_results) # Lower tolerance because we are running this as a `@slowTest` # Don't want the periodic tests to fail frequently - self.assertEqual(cuda_results, cpu_results, atol=1e-3, rtol=1e-3) + self.assertEqual(gpu_results, cpu_results, atol=1e-3, rtol=1e-3) # Tests that experimental Python References can propagate shape, dtype, # and device metadata properly. @@ -545,7 +544,7 @@ def test_python_ref_torch_fallback(self, device, dtype, op): self._ref_test_helper(contextlib.nullcontext, device, dtype, op) @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") - @onlyCUDAAndXPU + @onlyGPU @ops(python_ref_db) @parametrize("executor", ["aten"]) @skipIfTorchInductor("Takes too long for inductor") @@ -791,13 +790,12 @@ def _extract_strides(out): return tuple(t.stride() for t in out) # Extracts data pointers from a tensor or iterable of tensors into a tuple - # NOTE: only extracts on the CPU and CUDA device types since some + # NOTE: only extracts on the CPU and GPU device types since some # device types don't have storage def _extract_data_ptrs(out): if ( self.device_type != "cpu" - and self.device_type != "cuda" - and self.device_type != "xpu" + and self.device_type in GPU_TYPES ): return () @@ -923,13 +921,12 @@ def _extract_strides(out): return tuple(t.stride() for t in out) # Extracts data pointers from a tensor or iterable of tensors into a tuple - # NOTE: only extracts on the CPU and CUDA device types since some + # NOTE: only extracts on the CPU and GPU device types since some # device types don't have storage def _extract_data_ptrs(out): if ( self.device_type != "cpu" - and self.device_type != "cuda" - and self.device_type != "xpu" + and self.device_type not in GPU_TYPES ): return () @@ -1005,8 +1002,8 @@ def _case_two_transform(t): wrong_device = None if torch.device(device).type != "cpu": wrong_device = "cpu" - elif torch.cuda.is_available(): - wrong_device = "cuda" + elif HAS_GPU: + wrong_device = GPU_TYPE factory_fn_msg = ( "\n\nNOTE: If your op is a factory function (i.e., it accepts TensorOptions) you should mark its " @@ -1389,7 +1386,7 @@ def convert_boolean_tensors(x): self.assertEqual(expect, actual) # Validates that each OpInfo specifies its forward and backward dtypes - # correctly for CPU and CUDA devices + # correctly for CPU and GPU devices @skipMeta @onlyNativeDeviceTypesAnd(["hpu"]) @ops(ops_and_refs, dtypes=OpDTypes.none) @@ -2361,6 +2358,7 @@ def test_refs_are_in_decomp_table(self, op): # TODO: investigate/fix fake_autocast_device_skips["cpu"] = {"linalg.pinv"} fake_autocast_device_skips["cuda"] = {"linalg.pinv", "pinverse"} +fake_autocast_device_skips["xpu"] = {"linalg.pinv", "pinverse"} dynamic_output_op_tests = ( @@ -2647,7 +2645,7 @@ def _test_fake_crossref_helper(self, device, dtype, op, context): except torch._subclasses.fake_tensor.UnsupportedOperatorException: pass - @onlyCUDA + @onlyGPU @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) @skipOps( "TestFakeTensor", "test_fake_crossref_backward_no_amp", fake_backward_xfails @@ -2655,7 +2653,7 @@ def _test_fake_crossref_helper(self, device, dtype, op, context): def test_fake_crossref_backward_no_amp(self, device, dtype, op): self._test_fake_crossref_helper(device, dtype, op, contextlib.nullcontext) - @onlyCUDA + @onlyGPU @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) @skipOps( "TestFakeTensor", diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index e4608c24f224c..8b44a404615dc 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -59,6 +59,8 @@ TEST_WITH_UBSAN, TEST_XPU, TestCase, + GPU_TYPE, + GPU_TYPES, ) @@ -1628,6 +1630,10 @@ def onlyHPU(fn): return onlyOn("hpu")(fn) +def onlyGPU(fn): + return onlyOn(GPU_TYPES)(fn) + + def onlyPRIVATEUSE1(fn): device_type = torch._C._get_privateuse1_backend_name() device_mod = getattr(torch, device_type, None) @@ -1649,8 +1655,7 @@ def only_fn(self, *args, **kwargs): return only_fn -def onlyCUDAAndXPU(fn): - return onlyOn(["cuda", "xpu"])(fn) + def disablecuDNN(fn): @@ -1983,6 +1988,5 @@ def any_common_cpu_device_one(): else OpDTypes.any_common_cpu_cuda_one ) - -def has_gpu_device(devices: List[str]): +def is_gpu_device(devices: List[str]): return "cuda" in devices or "xpu" in devices diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 7fb25ff9ed705..98255b0fb0c70 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -27,7 +27,7 @@ (onlyCPU, onlyCUDA, onlyNativeDeviceTypes, disablecuDNN, skipCUDAIfNoMagma, skipCUDAIfNoMagmaAndNoCusolver, skipCUDAIfNoCusolver, skipCPUIfNoLapack, skipCPUIfNoFFT, skipCUDAIf, precisionOverride, skipCPUIfNoMklSparse, - toleranceOverride, tol) + toleranceOverride, tol, skipXPU) from torch.testing._internal.common_cuda import ( PLATFORM_SUPPORTS_FLASH_ATTENTION, PLATFORM_SUPPORTS_MEM_EFF_ATTENTION, SM53OrLater, SM80OrLater, SM90OrLater, with_tf32_off, TEST_CUDNN, _get_torch_cuda_version, @@ -12112,6 +12112,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): supports_fwgrad_bwgrad=True, gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, sample_inputs_func=sample_inputs_addmm, + decorators=[skipXPU,], skips=( # Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479 DecorateInfo( @@ -12153,6 +12154,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): DecorateInfo( toleranceOverride({torch.half: tol(atol=1e-5, rtol=3e-3)}), 'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'), + skipXPU, ], sample_inputs_func=sample_inputs_addmv), OpInfo('addbmm', @@ -12197,6 +12199,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), # https://github.com/pytorch/pytorch/issues/55907 DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'), + DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_numpy_ref', device_type='xpu', dtypes=[torch.float64, torch.complex128,]), ), sample_inputs_func=sample_inputs_addbmm), OpInfo('baddbmm', @@ -13458,7 +13461,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): skipCUDAIf(not ((_get_torch_cuda_version() >= (11, 3)) or (_get_torch_rocm_version() >= (5, 2))), "cusparseSDDMM was added in 11.2.1"), - skipCPUIfNoMklSparse, ], + skipCPUIfNoMklSparse, skipXPU, ], skips=( # NotImplementedError: Tensors of type SparseCsrTensorImpl do not have is_contiguous DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'), @@ -19984,6 +19987,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): supports_sparse_csc=True, check_batched_grad=False, check_batched_gradgrad=False, + decorators=[skipXPU,], skips=( # NotImplementedError: Could not run 'aten::normal_' with arguments from the 'SparseCPU' backend DecorateInfo(unittest.skip(""), 'TestCommon', 'test_noncontiguous_samples'), @@ -20635,7 +20639,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): # AssertionError: Tensor-likes are not close! # Fails in cuda11.7 # Error Log: https://github.com/pytorch/pytorch/actions/runs/3440108478/jobs/5738475757 - DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type='cuda'), + DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type=['cuda', 'xpu']), DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),),), # In training mode, feature_alpha_dropout currently doesn't support inputs of complex dtype # unlike when `train=False`, it supports complex inputs, hence 2 OpInfos to cover all cases @@ -21330,6 +21334,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): supports_gradgrad=False, skips=( DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'), + DecorateInfo(unittest.skip("Skipped!"), None, None, device_type='xpu', dtypes=[torch.float64,]), ) ), OpInfo( @@ -22868,6 +22873,20 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): dtypes=(torch.float32,), device_type='cpu', ), + DecorateInfo( + unittest.skip("Skipped!"), + None, + None, + device_type='xpu', + dtypes=[torch.float64,], + ), + DecorateInfo( + unittest.skip("Skipped!"), + 'TestCommon', + 'test_dtypes', + device_type='xpu', + dtypes=None, + ), )), PythonRefInfo( "_refs.nn.functional.leaky_relu", @@ -24737,4 +24756,4 @@ def apply_op_db_for_xpu(op_db_list: List[OpInfo]): def apply_op_db_for(op_db_list: List[OpInfo], device='xpu'): if TEST_XPU and device == 'xpu': - apply_op_db_for_xpu(op_db_list) \ No newline at end of file + apply_op_db_for_xpu(op_db_list) diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 54ea45cbfa74b..672a215878690 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -5397,3 +5397,30 @@ def get_backend_ops(device='xpu'): except yaml.YAMLError or FileExistsError: print("Error in loading op_db.yaml.") return backend_ops + +GPU_TYPES = ["cuda", "xpu"] + +# defines here before import torch._dynamo is for avoiding circular import +# when get_gpu_type is imported from dynamo +@functools.lru_cache(None) +def get_gpu_type(): + avail_gpus = [x for x in GPU_TYPES if getattr(torch, x).is_available()] + assert len(avail_gpus) <= 1 + gpu_type = "cuda" if len(avail_gpus) == 0 else avail_gpus.pop() + return gpu_type + +HAS_CUDA = torch.cuda.is_available() + +HAS_XPU = torch.xpu.is_available() + +HAS_GPU = HAS_CUDA or HAS_XPU + +GPU_TYPE = get_gpu_type() + +HAS_MULTIGPU = any( + getattr(torch, gpu).is_available() and getattr(torch, gpu).device_count() >= 2 + for gpu in GPU_TYPES +) + +def get_gpu_autocast() + return torch.cuda.amp.autocast if HAS_CUDA else torch.xpu.amp.autocast diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index f1f590527aad6..6c91bb96646d0 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -107,7 +107,8 @@ def is_active(self, cls_name, test_name, device_type, dtype, param_kwargs): self.active_if and (self.cls_name is None or self.cls_name == cls_name) and (self.test_name is None or self.test_name == test_name) - and (self.device_type is None or self.device_type == device_type) + and (self.device_type is None or (self.device_type == device_type + if isinstance(self.device_type, str) else device_type in self.device_type)) and (self.dtypes is None or dtype in self.dtypes) # Support callables over kwargs to determine if the decorator is active. and ( diff --git a/torch/testing/_internal/opinfo/definitions/linalg.py b/torch/testing/_internal/opinfo/definitions/linalg.py index e94c6a6711443..51cac4000c786 100644 --- a/torch/testing/_internal/opinfo/definitions/linalg.py +++ b/torch/testing/_internal/opinfo/definitions/linalg.py @@ -28,6 +28,7 @@ skipCUDAIfRocm, tol, toleranceOverride, + skipXPU, ) from torch.testing._internal.common_dtype import ( all_types_and_complex, @@ -1430,7 +1431,7 @@ def make_input(): check_batched_gradgrad=False, supports_forward_ad=True, supports_fwgrad_bwgrad=True, - decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack], + decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack, ], skips=( DecorateInfo( unittest.skip("Skipped!"), @@ -1453,6 +1454,20 @@ def make_input(): device_type="mps", dtypes=[torch.float32], ), + DecorateInfo( + unittest.expectedFailure, + "TestCommon", + "test_types", + device_type='xpu', + dtypes=None, + ), + DecorateInfo( + unittest.expectedFailure, + None, + None, + device_type='xpu', + dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ], + ), ), ), OpInfo( @@ -1874,10 +1889,13 @@ def make_input(): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_linalg_lu, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, ], + skips=( # linalg.lu_factor: LU without pivoting is not implemented on the CPU DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), + DecorateInfo(unittest.expectedFailure, "TestCommon", "test_types", device_type='xpu', dtypes=None), + DecorateInfo(unittest.expectedFailure, None, None, device_type='xpu', dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ]), ), ), OpInfo( @@ -1890,10 +1908,12 @@ def make_input(): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_linalg_lu, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, ], skips=( # linalg.lu_factor: LU without pivoting is not implemented on the CPU DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), + DecorateInfo(unittest.expectedFailure, "TestCommon", "test_types", device_type='xpu', dtypes=None), + DecorateInfo(unittest.expectedFailure, None, None, device_type='xpu', dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ]), ), ), OpInfo( @@ -1907,10 +1927,12 @@ def make_input(): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_linalg_lu, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, ], skips=( # linalg.lu_factor: LU without pivoting is not implemented on the CPU DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), + DecorateInfo(unittest.expectedFailure, "TestCommon", "test_types", device_type='xpu', dtypes=None), + DecorateInfo(unittest.expectedFailure, None, None, device_type='xpu', dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ]), ), ), OpInfo( @@ -2284,7 +2306,7 @@ def make_input(): check_batched_grad=False, check_batched_gradgrad=False, sample_inputs_func=sample_inputs_svd, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, with_tf32_off], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, with_tf32_off,], skips=( DecorateInfo( unittest.skip("Skipped!"), @@ -2323,6 +2345,13 @@ def make_input(): dtypes=[torch.float32], active_if=TEST_WITH_ROCM, ), + DecorateInfo( + unittest.skip("Skipped!"), + None, + None, + device_type="xpu", + dtypes=[torch.float64, torch.complex64, torch.complex128, ], + ), ), ), OpInfo( @@ -2373,6 +2402,13 @@ def make_input(): "TestCommon", "test_numpy_ref_mps", ), + DecorateInfo( + unittest.skip("Unsupported on MPS for now"), + "TestCommon", + "test_numpy_ref", + device_type="xpu", + dtypes=[torch.float64, torch.complex128,], + ), ), ), OpInfo( @@ -2471,6 +2507,15 @@ def make_input(): torch_opinfo_name="linalg.svd", supports_out=True, op_db=op_db, + skips=( + DecorateInfo( + unittest.skip("Skipped!"), + None, + None, + device_type="xpu", + dtypes=[torch.float64, torch.complex64, torch.complex128, ], + ), + ), ), PythonRefInfo( "_refs.linalg.svdvals", diff --git a/torch/testing/_internal/opinfo/definitions/special.py b/torch/testing/_internal/opinfo/definitions/special.py index f153deacaa99e..de49b845a2640 100644 --- a/torch/testing/_internal/opinfo/definitions/special.py +++ b/torch/testing/_internal/opinfo/definitions/special.py @@ -13,6 +13,7 @@ precisionOverride, tol, toleranceOverride, + skipXPU, ) from torch.testing._internal.common_dtype import all_types_and, floating_types from torch.testing._internal.common_utils import ( @@ -239,6 +240,7 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): promotes_int_to_float=True, supports_autograd=False, supports_one_python_scalar=True, + decorators=[skipXPU,], skips=( # Reference reference_inputs nans and infs on cuda and nan, inf, 0., -inf for cpu DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), @@ -461,6 +463,7 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): "special.hermite_polynomial_h", dtypes=all_types_and(torch.bool), promotes_int_to_float=True, + decorators=[skipXPU,], skips=( DecorateInfo(unittest.skip("Skipped!"), "TestCudaFuserOpInfo"), DecorateInfo(unittest.skip("Skipped!"), "TestNNCOpInfo"), @@ -834,6 +837,7 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): torch_opinfo_name="special.zeta", supports_one_python_scalar=True, op_db=op_db, + decorators = [skipXPU,], skips=( # Reference reference_inputs nans and infs on cuda and nan, inf, 0., -inf for cpu DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), From 4e2d28c971feba8d85936e317207ecc8f6443571 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Fri, 15 Nov 2024 14:18:45 +0000 Subject: [PATCH 28/37] fix GPU_TYPES import --- test/test_ops.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index e121f39034c8c..ca9a12b8d19a9 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -36,6 +36,7 @@ skipMeta, skipXPU, is_gpu_device, + GPU_TYPES, ) from torch.testing._internal.common_dtype import ( all_types_and_complex_and, @@ -1388,6 +1389,7 @@ def convert_boolean_tensors(x): # Validates that each OpInfo specifies its forward and backward dtypes # correctly for CPU and GPU devices @skipMeta + @skipXPU @onlyNativeDeviceTypesAnd(["hpu"]) @ops(ops_and_refs, dtypes=OpDTypes.none) def test_dtypes(self, device, op): @@ -1978,6 +1980,7 @@ def clone_and_perform_view(input, **kwargs): self.assertEqual(tensor.grad, cloned1_tensor.grad) @ops(ops_and_refs, allowed_dtypes=(torch.cfloat,)) + @skipXPU def test_conj_view(self, device, dtype, op): if not op.test_conjugated_samples: self.skipTest("Operation doesn't support conjugated inputs.") @@ -2019,6 +2022,7 @@ def test_neg_view(self, device, dtype, op): ) @ops(ops_and_refs, allowed_dtypes=(torch.cdouble,)) + @skipXPU def test_neg_conj_view(self, device, dtype, op): if not op.test_neg_view: self.skipTest("Operation not tested with tensors with negative bit.") @@ -2673,12 +2677,12 @@ def test_strided_layout(self, device, dtype, op): self.assertEqual(strided_result.layout, torch.strided) -instantiate_device_type_tests(TestCommon, globals(), allow_xpu=True) -instantiate_device_type_tests(TestCompositeCompliance, globals(), allow_xpu=True) -instantiate_device_type_tests(TestMathBits, globals(), allow_xpu=True) +instantiate_device_type_tests(TestCommon, globals()) +instantiate_device_type_tests(TestCompositeCompliance, globals()) +instantiate_device_type_tests(TestMathBits, globals()) instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") -instantiate_device_type_tests(TestFakeTensor, globals(), allow_xpu=True) -instantiate_device_type_tests(TestTags, globals(), allow_xpu=True) +instantiate_device_type_tests(TestFakeTensor, globals()) +instantiate_device_type_tests(TestTags, globals()) if __name__ == "__main__": TestCase._default_dtype_check_enabled = True From af203f2ee53a8a62ab1dc4922e89eea3327eccb9 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Fri, 15 Nov 2024 14:24:09 +0000 Subject: [PATCH 29/37] fix GPU_TYPES import --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index ca9a12b8d19a9..33c87ac24d155 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -36,7 +36,6 @@ skipMeta, skipXPU, is_gpu_device, - GPU_TYPES, ) from torch.testing._internal.common_dtype import ( all_types_and_complex_and, @@ -80,6 +79,7 @@ TEST_XPU, TestCase, unMarkDynamoStrictTest, + GPU_TYPES, ) from torch.utils._python_dispatch import TorchDispatchMode from torch.utils._pytree import tree_map From a7759087ffec563907eac2be9d6718083ad19994 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 01:55:53 +0000 Subject: [PATCH 30/37] fix typo --- torch/testing/_internal/common_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index d81845e280511..b285c3a8e44e4 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -5523,6 +5523,6 @@ def get_gpu_type(): for gpu in GPU_TYPES ) -def get_gpu_autocast() +def get_gpu_autocast(): return torch.cuda.amp.autocast if HAS_CUDA else torch.xpu.amp.autocast From 9c591c660311cd6f3f8f9dba06730de7227befce Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 05:50:48 +0000 Subject: [PATCH 31/37] remove xpu backend specific code in --- test/test_ops.py | 5 -- test/xpu/op_db.yaml | 72 ------------------- .../_internal/common_methods_invocations.py | 40 +---------- .../_internal/opinfo/definitions/linalg.py | 55 ++------------ 4 files changed, 8 insertions(+), 164 deletions(-) delete mode 100644 test/xpu/op_db.yaml diff --git a/test/test_ops.py b/test/test_ops.py index 8e9de589e1417..ee99834f61184 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -43,7 +43,6 @@ integral_types_and, ) from torch.testing._internal.common_methods_invocations import ( - apply_op_db_for, BinaryUfuncInfo, op_db, ops_and_refs, @@ -87,10 +86,6 @@ assert torch.get_default_dtype() == torch.float32 -# if TEST_XPU: -# apply_op_db_for(op_db, device="xpu") -# apply_op_db_for(python_ref_db, device="xpu") - # variant testing is only done with torch.float and torch.cfloat to avoid # excessive test times and maximize signal to noise ratio _variant_ops = partial( diff --git a/test/xpu/op_db.yaml b/test/xpu/op_db.yaml deleted file mode 100644 index 144513cdc7f5a..0000000000000 --- a/test/xpu/op_db.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Owner(s): ["module: intel"] -# Define the supported Aten ops in XPU backend, the dtypes are aligned with other GPUs. -supported_ops: - - fill - - zeros - - zeros_like - - clone - - view_as_real - - view_as_complex - - view - - resize_ - - resize_as_ - - add - - sub - - abs - - bitwise_and - - bitwise_not - - bitwise_or - - bitwise_xor - - clamp - - clamp_max - - clamp_min - - clone - - copy - - cumsum - - empty - - eq - - fill - - fmod - - gcd - - ge - - gelu - - gt - - index_add - - index_put - - index_select - - isnan - - le - - lt - - masked_fill - - maximum - - minimum - - native_dropout_backward - - ne - - neg - - nn.functional.adaptive_avg_pool2d - - nn.functional.threshold - - nonzero - - normal - - reciprocal - - rsub - - relu - - remainder - - reshape - - unfold - - uniform - - view - - zero - - add - - any - - arange - - as_strided - - flip - - tril - - triu - - cat - - log_softmax - - softmax - - scatter - - gather - - max_pool2d_with_indices_backward - - nn.functional.unfold diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index a683f3283410b..87aa9a4f949f2 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -27,7 +27,7 @@ (onlyCPU, onlyCUDA, onlyNativeDeviceTypes, disablecuDNN, skipCUDAIfNoMagma, skipCUDAIfNoMagmaAndNoCusolver, skipCUDAIfNoCusolver, skipCPUIfNoLapack, skipCPUIfNoFFT, skipCUDAIf, precisionOverride, skipCPUIfNoMklSparse, - toleranceOverride, tol, skipXPU) + toleranceOverride, tol) from torch.testing._internal.common_cuda import ( PLATFORM_SUPPORTS_FLASH_ATTENTION, PLATFORM_SUPPORTS_MEM_EFF_ATTENTION, SM53OrLater, SM80OrLater, SM90OrLater, with_tf32_off, TEST_CUDNN, _get_torch_cuda_version, @@ -12173,7 +12173,6 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): supports_fwgrad_bwgrad=True, gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, sample_inputs_func=sample_inputs_addmm, - decorators=[skipXPU,], skips=( # Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479 DecorateInfo( @@ -12215,7 +12214,6 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): DecorateInfo( toleranceOverride({torch.half: tol(atol=1e-5, rtol=3e-3)}), 'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'), - skipXPU, ], sample_inputs_func=sample_inputs_addmv), OpInfo('addbmm', @@ -13522,7 +13520,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): skipCUDAIf(not ((_get_torch_cuda_version() >= (11, 3)) or (_get_torch_rocm_version() >= (5, 2))), "cusparseSDDMM was added in 11.2.1"), - skipCPUIfNoMklSparse, skipXPU, ], + skipCPUIfNoMklSparse, ], skips=( # NotImplementedError: Tensors of type SparseCsrTensorImpl do not have is_contiguous DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'), @@ -20034,7 +20032,6 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): supports_sparse_csc=True, check_batched_grad=False, check_batched_gradgrad=False, - decorators=[skipXPU,], skips=( # NotImplementedError: Could not run 'aten::normal_' with arguments from the 'SparseCPU' backend DecorateInfo(unittest.skip(""), 'TestCommon', 'test_noncontiguous_samples'), @@ -21378,7 +21375,6 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): supports_gradgrad=False, skips=( DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'), - DecorateInfo(unittest.skip("Skipped!"), None, None, device_type='xpu', dtypes=[torch.float64,]), ) ), OpInfo( @@ -22924,20 +22920,6 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): dtypes=(torch.float32,), device_type='cpu', ), - DecorateInfo( - unittest.skip("Skipped!"), - None, - None, - device_type='xpu', - dtypes=[torch.float64,], - ), - DecorateInfo( - unittest.skip("Skipped!"), - 'TestCommon', - 'test_dtypes', - device_type='xpu', - dtypes=None, - ), )), PythonRefInfo( "_refs.nn.functional.leaky_relu", @@ -24791,20 +24773,4 @@ def skipOps(test_case_name, base_test_name, to_skip): # This decorator doesn't modify fn in any way def wrapped(fn): return fn - return wrapped - -def apply_op_db_for_xpu(op_db_list: List[OpInfo]): - # Get the supported op from yaml file. - supported_op_list = get_backend_ops(device='xpu')['supported_ops'] - - for op in op_db_list: - # For refs ops get the name of the related torch_opinfo. - torch_opinfo = op.torch_opinfo if hasattr(op, "torch_opinfo") else None - name = torch_opinfo.name if torch_opinfo is not None else op.name - if name not in supported_op_list: - # Update op_db, add unittest.skip decorators to skip the op for the backend. - op.decorators = (*op.decorators, DecorateInfo(unittest.skip, device_type='xpu', dtypes=None)) - -def apply_op_db_for(op_db_list: List[OpInfo], device='xpu'): - if TEST_XPU and device == 'xpu': - apply_op_db_for_xpu(op_db_list) + return wrapped \ No newline at end of file diff --git a/torch/testing/_internal/opinfo/definitions/linalg.py b/torch/testing/_internal/opinfo/definitions/linalg.py index 51cac4000c786..e94c6a6711443 100644 --- a/torch/testing/_internal/opinfo/definitions/linalg.py +++ b/torch/testing/_internal/opinfo/definitions/linalg.py @@ -28,7 +28,6 @@ skipCUDAIfRocm, tol, toleranceOverride, - skipXPU, ) from torch.testing._internal.common_dtype import ( all_types_and_complex, @@ -1431,7 +1430,7 @@ def make_input(): check_batched_gradgrad=False, supports_forward_ad=True, supports_fwgrad_bwgrad=True, - decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack, ], + decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack], skips=( DecorateInfo( unittest.skip("Skipped!"), @@ -1454,20 +1453,6 @@ def make_input(): device_type="mps", dtypes=[torch.float32], ), - DecorateInfo( - unittest.expectedFailure, - "TestCommon", - "test_types", - device_type='xpu', - dtypes=None, - ), - DecorateInfo( - unittest.expectedFailure, - None, - None, - device_type='xpu', - dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ], - ), ), ), OpInfo( @@ -1889,13 +1874,10 @@ def make_input(): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_linalg_lu, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, ], - + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack], skips=( # linalg.lu_factor: LU without pivoting is not implemented on the CPU DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), - DecorateInfo(unittest.expectedFailure, "TestCommon", "test_types", device_type='xpu', dtypes=None), - DecorateInfo(unittest.expectedFailure, None, None, device_type='xpu', dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ]), ), ), OpInfo( @@ -1908,12 +1890,10 @@ def make_input(): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_linalg_lu, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, ], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack], skips=( # linalg.lu_factor: LU without pivoting is not implemented on the CPU DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), - DecorateInfo(unittest.expectedFailure, "TestCommon", "test_types", device_type='xpu', dtypes=None), - DecorateInfo(unittest.expectedFailure, None, None, device_type='xpu', dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ]), ), ), OpInfo( @@ -1927,12 +1907,10 @@ def make_input(): supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_linalg_lu, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, ], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack], skips=( # linalg.lu_factor: LU without pivoting is not implemented on the CPU DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), - DecorateInfo(unittest.expectedFailure, "TestCommon", "test_types", device_type='xpu', dtypes=None), - DecorateInfo(unittest.expectedFailure, None, None, device_type='xpu', dtypes=[torch.complex, torch.float64, torch.complex64, torch.complex128, ]), ), ), OpInfo( @@ -2306,7 +2284,7 @@ def make_input(): check_batched_grad=False, check_batched_gradgrad=False, sample_inputs_func=sample_inputs_svd, - decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, with_tf32_off,], + decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, with_tf32_off], skips=( DecorateInfo( unittest.skip("Skipped!"), @@ -2345,13 +2323,6 @@ def make_input(): dtypes=[torch.float32], active_if=TEST_WITH_ROCM, ), - DecorateInfo( - unittest.skip("Skipped!"), - None, - None, - device_type="xpu", - dtypes=[torch.float64, torch.complex64, torch.complex128, ], - ), ), ), OpInfo( @@ -2402,13 +2373,6 @@ def make_input(): "TestCommon", "test_numpy_ref_mps", ), - DecorateInfo( - unittest.skip("Unsupported on MPS for now"), - "TestCommon", - "test_numpy_ref", - device_type="xpu", - dtypes=[torch.float64, torch.complex128,], - ), ), ), OpInfo( @@ -2507,15 +2471,6 @@ def make_input(): torch_opinfo_name="linalg.svd", supports_out=True, op_db=op_db, - skips=( - DecorateInfo( - unittest.skip("Skipped!"), - None, - None, - device_type="xpu", - dtypes=[torch.float64, torch.complex64, torch.complex128, ], - ), - ), ), PythonRefInfo( "_refs.linalg.svdvals", From af351f031c78097e63e995b8c2113225cb3cd528 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 05:57:18 +0000 Subject: [PATCH 32/37] further remove xpu backend specific skips --- torch/testing/_internal/common_methods_invocations.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 87aa9a4f949f2..d6a83a259bdee 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -38,7 +38,7 @@ TEST_WITH_ROCM, IS_FBCODE, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR, TEST_XPU, get_backend_ops, + TEST_WITH_TORCHINDUCTOR, ) from torch.testing._utils import wrapper_set_seed @@ -12258,7 +12258,6 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'), # https://github.com/pytorch/pytorch/issues/55907 DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'), - DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_numpy_ref', device_type='xpu', dtypes=[torch.float64, torch.complex128,]), ), sample_inputs_func=sample_inputs_addbmm), OpInfo('baddbmm', @@ -20685,7 +20684,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): # AssertionError: Tensor-likes are not close! # Fails in cuda11.7 # Error Log: https://github.com/pytorch/pytorch/actions/runs/3440108478/jobs/5738475757 - DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type=['cuda', 'xpu']), + DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type=['cuda']), DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),),), # In training mode, feature_alpha_dropout currently doesn't support inputs of complex dtype # unlike when `train=False`, it supports complex inputs, hence 2 OpInfos to cover all cases From 8381e1845da4cd2d12dfe810f2967c7f3e5e4e13 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 05:59:56 +0000 Subject: [PATCH 33/37] further remove xpu backend specific skips --- torch/testing/_internal/common_methods_invocations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index d6a83a259bdee..d751091f421dc 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -20684,7 +20684,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): # AssertionError: Tensor-likes are not close! # Fails in cuda11.7 # Error Log: https://github.com/pytorch/pytorch/actions/runs/3440108478/jobs/5738475757 - DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type=['cuda']), + DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type='cuda'), DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),),), # In training mode, feature_alpha_dropout currently doesn't support inputs of complex dtype # unlike when `train=False`, it supports complex inputs, hence 2 OpInfos to cover all cases From 5f5d50fd8be399f7f17a42119aca1417deb66d8c Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 06:02:25 +0000 Subject: [PATCH 34/37] further remove xpu backend specific skips --- torch/testing/_internal/common_methods_invocations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index d751091f421dc..846c539305aed 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -38,7 +38,7 @@ TEST_WITH_ROCM, IS_FBCODE, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR, + TEST_WITH_TORCHINDUCTOR ) from torch.testing._utils import wrapper_set_seed From bd2f0b8863fd2ce3b7a62afa7a5d6c0af4ec7fa8 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 06:24:59 +0000 Subject: [PATCH 35/37] remove yaml dependency --- torch/testing/_internal/common_methods_invocations.py | 2 +- torch/testing/_internal/common_utils.py | 1 - torch/testing/_internal/opinfo/definitions/special.py | 4 ---- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 846c539305aed..1b88cb99fc1fb 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -38,7 +38,7 @@ TEST_WITH_ROCM, IS_FBCODE, IS_WINDOWS, IS_MACOS, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW, - TEST_WITH_TORCHINDUCTOR + TEST_WITH_TORCHINDUCTOR ) from torch.testing._utils import wrapper_set_seed diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index b285c3a8e44e4..c5165a05d1793 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -36,7 +36,6 @@ import types import unittest import warnings -import yaml from collections.abc import Mapping, Sequence from contextlib import closing, contextmanager from copy import deepcopy diff --git a/torch/testing/_internal/opinfo/definitions/special.py b/torch/testing/_internal/opinfo/definitions/special.py index de49b845a2640..f153deacaa99e 100644 --- a/torch/testing/_internal/opinfo/definitions/special.py +++ b/torch/testing/_internal/opinfo/definitions/special.py @@ -13,7 +13,6 @@ precisionOverride, tol, toleranceOverride, - skipXPU, ) from torch.testing._internal.common_dtype import all_types_and, floating_types from torch.testing._internal.common_utils import ( @@ -240,7 +239,6 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): promotes_int_to_float=True, supports_autograd=False, supports_one_python_scalar=True, - decorators=[skipXPU,], skips=( # Reference reference_inputs nans and infs on cuda and nan, inf, 0., -inf for cpu DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), @@ -463,7 +461,6 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): "special.hermite_polynomial_h", dtypes=all_types_and(torch.bool), promotes_int_to_float=True, - decorators=[skipXPU,], skips=( DecorateInfo(unittest.skip("Skipped!"), "TestCudaFuserOpInfo"), DecorateInfo(unittest.skip("Skipped!"), "TestNNCOpInfo"), @@ -837,7 +834,6 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): torch_opinfo_name="special.zeta", supports_one_python_scalar=True, op_db=op_db, - decorators = [skipXPU,], skips=( # Reference reference_inputs nans and infs on cuda and nan, inf, 0., -inf for cpu DecorateInfo(unittest.expectedFailure, "TestCommon", "test_compare_cpu"), From 6f513cbee126eb18422ed7e3f137c16d5dd39be2 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Mon, 18 Nov 2024 13:04:00 +0000 Subject: [PATCH 36/37] fix HAS_GPU import issue --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index ee99834f61184..14ade931a6833 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -75,10 +75,10 @@ TEST_WITH_TORCHDYNAMO, TEST_WITH_TORCHINDUCTOR, TEST_WITH_UBSAN, - TEST_XPU, TestCase, unMarkDynamoStrictTest, GPU_TYPES, + HAS_GPU, ) from torch.utils._python_dispatch import TorchDispatchMode from torch.utils._pytree import tree_map From b29b5934d3fd9477f7f1cc2203fda8c39782a8b6 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Thu, 21 Nov 2024 12:33:33 +0000 Subject: [PATCH 37/37] remove unused function get_backend_ops as design changes, return torch.autocast in get_gpu_autocast --- test/test_ops.py | 3 ++- torch/testing/_internal/common_utils.py | 15 +-------------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index a28db437ba72f..104adc2641ab5 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -79,6 +79,7 @@ unMarkDynamoStrictTest, GPU_TYPES, HAS_GPU, + get_gpu_autocast, ) from torch.utils._python_dispatch import TorchDispatchMode from torch.utils._pytree import tree_map @@ -2840,7 +2841,7 @@ def test_fake_crossref_backward_no_amp(self, device, dtype, op): fake_backward_xfails | fake_autocast_backward_xfails, ) def test_fake_crossref_backward_amp(self, device, dtype, op): - self._test_fake_crossref_helper(device, dtype, op, torch.cuda.amp.autocast) + self._test_fake_crossref_helper(device, dtype, op, get_gpu_autocast()) @ops([op for op in ops_and_refs if op.is_factory_function]) def test_strided_layout(self, device, dtype, op): diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 3e2dc60e1efa7..e592ffcf86720 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -5532,21 +5532,8 @@ def load_inline(*args, **kwargs): return wrapper -def get_backend_ops(device='xpu'): - backend_ops = {} - if TEST_XPU and device == 'xpu': - xpu_op_db = CI_TEST_PREFIX + "/" + device + "/op_db.yaml" - try: - with open(xpu_op_db) as stream: - backend_ops = yaml.safe_load(stream) - except yaml.YAMLError or FileExistsError: - print("Error in loading op_db.yaml.") - return backend_ops - GPU_TYPES = ["cuda", "xpu"] -# defines here before import torch._dynamo is for avoiding circular import -# when get_gpu_type is imported from dynamo @functools.lru_cache(None) def get_gpu_type(): avail_gpus = [x for x in GPU_TYPES if getattr(torch, x).is_available()] @@ -5568,5 +5555,5 @@ def get_gpu_type(): ) def get_gpu_autocast(): - return torch.cuda.amp.autocast if HAS_CUDA else torch.xpu.amp.autocast + return torch.cuda.amp.autocast if HAS_CUDA else torch.autocast