From a6f84558c7b607fa1dc8b0cc29fe7d240e32865e Mon Sep 17 00:00:00 2001
From: Rory Mitchell <r.a.mitchell.nz@gmail.com>
Date: Tue, 2 Jun 2020 16:11:59 +1200
Subject: [PATCH 1/4] Speed up tests

---
 tests/python-gpu/test_gpu_linear.py       |  9 ++++-----
 tests/python/regression_test_utilities.py | 18 +++++++++++++-----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
index a0726f0794f7..499929637d98 100644
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -3,12 +3,11 @@
 import unittest
 
 sys.path.append('tests/python/')
-import test_linear              # noqa: E402
-import testing as tm            # noqa: E402
+import test_linear  # noqa: E402
+import testing as tm  # noqa: E402
 
 
 class TestGPULinear(unittest.TestCase):
-
     datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
     common_param = {
         'booster': ['gblinear'],
@@ -16,7 +15,7 @@ class TestGPULinear(unittest.TestCase):
         'eta': [0.5],
         'top_k': [10],
         'tolerance': [1e-5],
-        'alpha': [.005, .1],
+        'alpha': [.1],
         'lambda': [0.005],
         'coordinate_selection': ['cyclic', 'random', 'greedy']}
 
@@ -26,6 +25,6 @@ def test_gpu_coordinate(self):
         parameters['gpu_id'] = [0]
         for param in test_linear.parameter_combinations(parameters):
             results = test_linear.run_suite(
-                param, 150, self.datasets, scale_features=True)
+                param, 100, self.datasets, scale_features=True)
             test_linear.assert_regression_result(results, 1e-2)
             test_linear.assert_classification_result(results)
diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py
index 1b1b095dea4a..82681a17c725 100644
--- a/tests/python/regression_test_utilities.py
+++ b/tests/python/regression_test_utilities.py
@@ -59,7 +59,11 @@ def get_sparse():
     n = 5000
     sparsity = 0.75
     X, y = datasets.make_regression(n, random_state=rng)
-    X = np.array([[0.0 if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
+    flag = np.random.binomial(1, sparsity, X.shape)
+    for i in range(X.shape[0]):
+        for j in range(X.shape[1]):
+            if flag[i, j]:
+                X[i, j] = 0.0
     from scipy import sparse
     X = sparse.csr_matrix(X)
     return X, y
@@ -78,8 +82,11 @@ def get_weights_regression(min_weight, max_weight):
     n = 10000
     sparsity = 0.25
     X, y = datasets.make_regression(n, random_state=rng)
-    X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x
-                   for x in x_row] for x_row in X])
+    flag = np.random.binomial(1, sparsity, X.shape)
+    for i in range(X.shape[0]):
+        for j in range(X.shape[1]):
+            if flag[i, j]:
+                X[i, j] = np.nan
     w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
     return X, y, w
 
@@ -101,7 +108,7 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri
         np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
                    delimiter=',')
         dtrain = DMatrixT('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
-                             weight=dataset.w)
+                          weight=dataset.w)
     elif DMatrixT is xgb.DeviceQuantileDMatrix:
         import cupy as cp
         dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params)
@@ -146,7 +153,8 @@ def parameter_combinations(variable_param):
 def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False,
               DMatrixT=xgb.DMatrix, dmatrix_params={}):
     """
-    Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
+    Run the given parameters on a range of datasets. Objective and eval metric will be
+    automatically set
     """
     datasets = [
         Dataset("Boston", get_boston, "reg:squarederror", "rmse"),

From cfcc20843653cc341176f9bb067b7db0218b26b6 Mon Sep 17 00:00:00 2001
From: Rory Mitchell <r.a.mitchell.nz@gmail.com>
Date: Wed, 3 Jun 2020 12:20:30 +1200
Subject: [PATCH 2/4] Prevent DeviceQuantileDMatrix initialisation with numpy

---
 python-package/xgboost/data.py                  |  4 ----
 .../python-gpu/test_device_quantile_dmatrix.py  | 17 +++++++++++++++++
 tests/python/regression_test_utilities.py       |  2 +-
 3 files changed, 18 insertions(+), 5 deletions(-)
 create mode 100644 tests/python-gpu/test_device_quantile_dmatrix.py

diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py
index 2a8669d105ba..b90410e216aa 100644
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -566,10 +566,6 @@ def handle_input(self, data, feature_names, feature_types):
 
 __device_quantile_dmatrix_registry.register_handler(
     'cupy.core.core', 'ndarray', DeviceQuantileCudaArrayInterfaceHandler)
-__device_quantile_dmatrix_registry.register_handler_opaque(
-    lambda x: hasattr(x, '__array__'), NumpyHandler)
-__device_quantile_dmatrix_registry.register_handler_opaque(
-    lambda x: hasattr(x, '__cuda_array_interface__'), NumpyHandler)
 
 
 class DeviceQuantileCudaColumnarHandler(DeviceQuantileDMatrixDataHandler,
diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py
new file mode 100644
index 000000000000..05919c91d34d
--- /dev/null
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import xgboost as xgb
+import unittest
+import pytest
+import cupy as cp
+
+
+class TestDeviceQuantileDMatrix(unittest.TestCase):
+    def test_dmatrix_numpy_init(self):
+        data = np.random.randn(5, 5)
+        with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'):
+            dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
+
+    def test_dmatrix_cupy_init(self):
+        data = cp.random.randn(5, 5)
+        dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py
index 82681a17c725..19f366fb1fc2 100644
--- a/tests/python/regression_test_utilities.py
+++ b/tests/python/regression_test_utilities.py
@@ -111,7 +111,7 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri
                           weight=dataset.w)
     elif DMatrixT is xgb.DeviceQuantileDMatrix:
         import cupy as cp
-        dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params)
+        dtrain = DMatrixT(cp.array(X), cp.array(dataset.y), weight=None if dataset.w is None else cp.array(dataset.w), **dmatrix_params)
     else:
         dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params)
 

From a5722a13bf47697f1fe67906aaa008df8e2c3d9c Mon Sep 17 00:00:00 2001
From: Rory Mitchell <r.a.mitchell.nz@gmail.com>
Date: Wed, 3 Jun 2020 13:33:36 +1200
Subject: [PATCH 3/4] Use joblib.memory

---
 tests/python/regression_test_utilities.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py
index 19f366fb1fc2..3257a4f4846f 100644
--- a/tests/python/regression_test_utilities.py
+++ b/tests/python/regression_test_utilities.py
@@ -4,6 +4,8 @@
 import os
 import sys
 import xgboost as xgb
+from joblib import Memory
+memory = Memory('./cachedir', verbose=0)
 
 try:
     from sklearn import datasets
@@ -39,24 +41,28 @@ def __repr__(self):
         return self.__str__()
 
 
+@memory.cache
 def get_boston():
     data = datasets.load_boston()
     return data.data, data.target
 
 
+@memory.cache
 def get_digits():
     data = datasets.load_digits()
     return data.data, data.target
 
 
+@memory.cache
 def get_cancer():
     data = datasets.load_breast_cancer()
     return data.data, data.target
 
 
+@memory.cache
 def get_sparse():
     rng = np.random.RandomState(199)
-    n = 5000
+    n = 2000
     sparsity = 0.75
     X, y = datasets.make_regression(n, random_state=rng)
     flag = np.random.binomial(1, sparsity, X.shape)
@@ -77,17 +83,18 @@ def get_small_weights():
     return get_weights_regression(1e-6, 1e-5)
 
 
+@memory.cache
 def get_weights_regression(min_weight, max_weight):
-    rng = np.random.RandomState(199)
-    n = 10000
+    np.random.seed(199)
+    n = 2000
     sparsity = 0.25
-    X, y = datasets.make_regression(n, random_state=rng)
+    X, y = datasets.make_regression(n, random_state=199)
     flag = np.random.binomial(1, sparsity, X.shape)
     for i in range(X.shape[0]):
         for j in range(X.shape[1]):
             if flag[i, j]:
                 X[i, j] = np.nan
-    w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
+    w = np.random.uniform(min_weight, max_weight, n)
     return X, y, w
 
 
@@ -111,7 +118,9 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri
                           weight=dataset.w)
     elif DMatrixT is xgb.DeviceQuantileDMatrix:
         import cupy as cp
-        dtrain = DMatrixT(cp.array(X), cp.array(dataset.y), weight=None if dataset.w is None else cp.array(dataset.w), **dmatrix_params)
+        dtrain = DMatrixT(cp.array(X), cp.array(dataset.y),
+                          weight=None if dataset.w is None else cp.array(dataset.w),
+                          **dmatrix_params)
     else:
         dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params)
 

From df87dc15f5d106d84e86664413126652eaa06f18 Mon Sep 17 00:00:00 2001
From: Rory Mitchell <r.a.mitchell.nz@gmail.com>
Date: Wed, 3 Jun 2020 15:20:22 +1200
Subject: [PATCH 4/4] Use RandomState

---
 tests/python-gpu/test_device_quantile_dmatrix.py |  7 ++++++-
 tests/python-gpu/test_gpu_updaters.py            |  1 +
 tests/python/regression_test_utilities.py        | 10 +++++-----
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py
index 05919c91d34d..8b8bf85954f8 100644
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -3,7 +3,10 @@
 import xgboost as xgb
 import unittest
 import pytest
-import cupy as cp
+import sys
+
+sys.path.append("tests/python")
+import testing as tm
 
 
 class TestDeviceQuantileDMatrix(unittest.TestCase):
@@ -12,6 +15,8 @@ def test_dmatrix_numpy_init(self):
         with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'):
             dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
 
+    @pytest.mark.skipif(**tm.no_cupy())
     def test_dmatrix_cupy_init(self):
+        import cupy as cp
         data = cp.random.randn(5, 5)
         dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index b8612444d569..755e3bb43f8f 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -47,6 +47,7 @@ def test_gpu_hist_device_dmatrix(self):
         device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
         for param in test_param:
             param['tree_method'] = 'gpu_hist'
+            
             gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
                                                    DMatrixT=xgb.DeviceQuantileDMatrix,
                                                    dmatrix_params={'max_bin': param['max_bin']})
diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py
index 3257a4f4846f..b2b44000761d 100644
--- a/tests/python/regression_test_utilities.py
+++ b/tests/python/regression_test_utilities.py
@@ -65,7 +65,7 @@ def get_sparse():
     n = 2000
     sparsity = 0.75
     X, y = datasets.make_regression(n, random_state=rng)
-    flag = np.random.binomial(1, sparsity, X.shape)
+    flag = rng.binomial(1, sparsity, X.shape)
     for i in range(X.shape[0]):
         for j in range(X.shape[1]):
             if flag[i, j]:
@@ -85,16 +85,16 @@ def get_small_weights():
 
 @memory.cache
 def get_weights_regression(min_weight, max_weight):
-    np.random.seed(199)
+    rng = np.random.RandomState(199)
     n = 2000
     sparsity = 0.25
-    X, y = datasets.make_regression(n, random_state=199)
-    flag = np.random.binomial(1, sparsity, X.shape)
+    X, y = datasets.make_regression(n, random_state=rng)
+    flag = rng.binomial(1, sparsity, X.shape)
     for i in range(X.shape[0]):
         for j in range(X.shape[1]):
             if flag[i, j]:
                 X[i, j] = np.nan
-    w = np.random.uniform(min_weight, max_weight, n)
+    w = rng.uniform(min_weight, max_weight, n)
     return X, y, w