From a6f84558c7b607fa1dc8b0cc29fe7d240e32865e Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Tue, 2 Jun 2020 16:11:59 +1200 Subject: [PATCH 1/4] Speed up tests --- tests/python-gpu/test_gpu_linear.py | 9 ++++----- tests/python/regression_test_utilities.py | 18 +++++++++++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py index a0726f0794f7..499929637d98 100644 --- a/tests/python-gpu/test_gpu_linear.py +++ b/tests/python-gpu/test_gpu_linear.py @@ -3,12 +3,11 @@ import unittest sys.path.append('tests/python/') -import test_linear # noqa: E402 -import testing as tm # noqa: E402 +import test_linear # noqa: E402 +import testing as tm # noqa: E402 class TestGPULinear(unittest.TestCase): - datasets = ["Boston", "Digits", "Cancer", "Sparse regression"] common_param = { 'booster': ['gblinear'], @@ -16,7 +15,7 @@ class TestGPULinear(unittest.TestCase): 'eta': [0.5], 'top_k': [10], 'tolerance': [1e-5], - 'alpha': [.005, .1], + 'alpha': [.1], 'lambda': [0.005], 'coordinate_selection': ['cyclic', 'random', 'greedy']} @@ -26,6 +25,6 @@ def test_gpu_coordinate(self): parameters['gpu_id'] = [0] for param in test_linear.parameter_combinations(parameters): results = test_linear.run_suite( - param, 150, self.datasets, scale_features=True) + param, 100, self.datasets, scale_features=True) test_linear.assert_regression_result(results, 1e-2) test_linear.assert_classification_result(results) diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py index 1b1b095dea4a..82681a17c725 100644 --- a/tests/python/regression_test_utilities.py +++ b/tests/python/regression_test_utilities.py @@ -59,7 +59,11 @@ def get_sparse(): n = 5000 sparsity = 0.75 X, y = datasets.make_regression(n, random_state=rng) - X = np.array([[0.0 if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X]) + flag = np.random.binomial(1, sparsity, X.shape) + for i in range(X.shape[0]): + for j in range(X.shape[1]): + if flag[i, j]: + X[i, j] = 0.0 from scipy import sparse X = sparse.csr_matrix(X) return X, y @@ -78,8 +82,11 @@ def get_weights_regression(min_weight, max_weight): n = 10000 sparsity = 0.25 X, y = datasets.make_regression(n, random_state=rng) - X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x - for x in x_row] for x_row in X]) + flag = np.random.binomial(1, sparsity, X.shape) + for i in range(X.shape[0]): + for j in range(X.shape[1]): + if flag[i, j]: + X[i, j] = np.nan w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)]) return X, y, w @@ -101,7 +108,7 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)), delimiter=',') dtrain = DMatrixT('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_', - weight=dataset.w) + weight=dataset.w) elif DMatrixT is xgb.DeviceQuantileDMatrix: import cupy as cp dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params) @@ -146,7 +153,8 @@ def parameter_combinations(variable_param): def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False, DMatrixT=xgb.DMatrix, dmatrix_params={}): """ - Run the given parameters on a range of datasets. Objective and eval metric will be automatically set + Run the given parameters on a range of datasets. Objective and eval metric will be + automatically set """ datasets = [ Dataset("Boston", get_boston, "reg:squarederror", "rmse"), From cfcc20843653cc341176f9bb067b7db0218b26b6 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 3 Jun 2020 12:20:30 +1200 Subject: [PATCH 2/4] Prevent DeviceQuantileDMatrix initialisation with numpy --- python-package/xgboost/data.py | 4 ---- .../python-gpu/test_device_quantile_dmatrix.py | 17 +++++++++++++++++ tests/python/regression_test_utilities.py | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 tests/python-gpu/test_device_quantile_dmatrix.py diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 2a8669d105ba..b90410e216aa 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -566,10 +566,6 @@ def handle_input(self, data, feature_names, feature_types): __device_quantile_dmatrix_registry.register_handler( 'cupy.core.core', 'ndarray', DeviceQuantileCudaArrayInterfaceHandler) -__device_quantile_dmatrix_registry.register_handler_opaque( - lambda x: hasattr(x, '__array__'), NumpyHandler) -__device_quantile_dmatrix_registry.register_handler_opaque( - lambda x: hasattr(x, '__cuda_array_interface__'), NumpyHandler) class DeviceQuantileCudaColumnarHandler(DeviceQuantileDMatrixDataHandler, diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py new file mode 100644 index 000000000000..05919c91d34d --- /dev/null +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +import numpy as np +import xgboost as xgb +import unittest +import pytest +import cupy as cp + + +class TestDeviceQuantileDMatrix(unittest.TestCase): + def test_dmatrix_numpy_init(self): + data = np.random.randn(5, 5) + with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'): + dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64)) + + def test_dmatrix_cupy_init(self): + data = cp.random.randn(5, 5) + dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64)) diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py index 82681a17c725..19f366fb1fc2 100644 --- a/tests/python/regression_test_utilities.py +++ b/tests/python/regression_test_utilities.py @@ -111,7 +111,7 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri weight=dataset.w) elif DMatrixT is xgb.DeviceQuantileDMatrix: import cupy as cp - dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params) + dtrain = DMatrixT(cp.array(X), cp.array(dataset.y), weight=None if dataset.w is None else cp.array(dataset.w), **dmatrix_params) else: dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params) From a5722a13bf47697f1fe67906aaa008df8e2c3d9c Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 3 Jun 2020 13:33:36 +1200 Subject: [PATCH 3/4] Use joblib.memory --- tests/python/regression_test_utilities.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py index 19f366fb1fc2..3257a4f4846f 100644 --- a/tests/python/regression_test_utilities.py +++ b/tests/python/regression_test_utilities.py @@ -4,6 +4,8 @@ import os import sys import xgboost as xgb +from joblib import Memory +memory = Memory('./cachedir', verbose=0) try: from sklearn import datasets @@ -39,24 +41,28 @@ def __repr__(self): return self.__str__() +@memory.cache def get_boston(): data = datasets.load_boston() return data.data, data.target +@memory.cache def get_digits(): data = datasets.load_digits() return data.data, data.target +@memory.cache def get_cancer(): data = datasets.load_breast_cancer() return data.data, data.target +@memory.cache def get_sparse(): rng = np.random.RandomState(199) - n = 5000 + n = 2000 sparsity = 0.75 X, y = datasets.make_regression(n, random_state=rng) flag = np.random.binomial(1, sparsity, X.shape) @@ -77,17 +83,18 @@ def get_small_weights(): return get_weights_regression(1e-6, 1e-5) +@memory.cache def get_weights_regression(min_weight, max_weight): - rng = np.random.RandomState(199) - n = 10000 + np.random.seed(199) + n = 2000 sparsity = 0.25 - X, y = datasets.make_regression(n, random_state=rng) + X, y = datasets.make_regression(n, random_state=199) flag = np.random.binomial(1, sparsity, X.shape) for i in range(X.shape[0]): for j in range(X.shape[1]): if flag[i, j]: X[i, j] = np.nan - w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)]) + w = np.random.uniform(min_weight, max_weight, n) return X, y, w @@ -111,7 +118,9 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri weight=dataset.w) elif DMatrixT is xgb.DeviceQuantileDMatrix: import cupy as cp - dtrain = DMatrixT(cp.array(X), cp.array(dataset.y), weight=None if dataset.w is None else cp.array(dataset.w), **dmatrix_params) + dtrain = DMatrixT(cp.array(X), cp.array(dataset.y), + weight=None if dataset.w is None else cp.array(dataset.w), + **dmatrix_params) else: dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params) From df87dc15f5d106d84e86664413126652eaa06f18 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 3 Jun 2020 15:20:22 +1200 Subject: [PATCH 4/4] Use RandomState --- tests/python-gpu/test_device_quantile_dmatrix.py | 7 ++++++- tests/python-gpu/test_gpu_updaters.py | 1 + tests/python/regression_test_utilities.py | 10 +++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py index 05919c91d34d..8b8bf85954f8 100644 --- a/tests/python-gpu/test_device_quantile_dmatrix.py +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -3,7 +3,10 @@ import xgboost as xgb import unittest import pytest -import cupy as cp +import sys + +sys.path.append("tests/python") +import testing as tm class TestDeviceQuantileDMatrix(unittest.TestCase): @@ -12,6 +15,8 @@ def test_dmatrix_numpy_init(self): with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'): dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64)) + @pytest.mark.skipif(**tm.no_cupy()) def test_dmatrix_cupy_init(self): + import cupy as cp data = cp.random.randn(5, 5) dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64)) diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index b8612444d569..755e3bb43f8f 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -47,6 +47,7 @@ def test_gpu_hist_device_dmatrix(self): device_dmatrix_datasets = ["Boston", "Cancer", "Digits"] for param in test_param: param['tree_method'] = 'gpu_hist' + gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets, DMatrixT=xgb.DeviceQuantileDMatrix, dmatrix_params={'max_bin': param['max_bin']}) diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py index 3257a4f4846f..b2b44000761d 100644 --- a/tests/python/regression_test_utilities.py +++ b/tests/python/regression_test_utilities.py @@ -65,7 +65,7 @@ def get_sparse(): n = 2000 sparsity = 0.75 X, y = datasets.make_regression(n, random_state=rng) - flag = np.random.binomial(1, sparsity, X.shape) + flag = rng.binomial(1, sparsity, X.shape) for i in range(X.shape[0]): for j in range(X.shape[1]): if flag[i, j]: @@ -85,16 +85,16 @@ def get_small_weights(): @memory.cache def get_weights_regression(min_weight, max_weight): - np.random.seed(199) + rng = np.random.RandomState(199) n = 2000 sparsity = 0.25 - X, y = datasets.make_regression(n, random_state=199) - flag = np.random.binomial(1, sparsity, X.shape) + X, y = datasets.make_regression(n, random_state=rng) + flag = rng.binomial(1, sparsity, X.shape) for i in range(X.shape[0]): for j in range(X.shape[1]): if flag[i, j]: X[i, j] = np.nan - w = np.random.uniform(min_weight, max_weight, n) + w = rng.uniform(min_weight, max_weight, n) return X, y, w