Skip to content

Commit e726dd9

Browse files
authored
Set device in device dmatrix. (#5596)
1 parent ef26bc4 commit e726dd9

File tree

8 files changed

+41
-5
lines changed

8 files changed

+41
-5
lines changed

include/xgboost/data.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ class MetaInfo {
102102
/*!
103103
* \brief Validate all metainfo.
104104
*/
105-
void Validate() const;
105+
void Validate(int32_t device) const;
106106

107107
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
108108
/*!

src/data/data.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
338338
}
339339
}
340340

341-
void MetaInfo::Validate() const {
341+
void MetaInfo::Validate(int32_t device) const {
342342
if (group_ptr_.size() != 0 && weights_.Size() != 0) {
343343
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
344344
<< "Size of weights must equal to number of groups when ranking "
@@ -350,30 +350,44 @@ void MetaInfo::Validate() const {
350350
<< "Invalid group structure. Number of rows obtained from groups "
351351
"doesn't equal to actual number of rows given by data.";
352352
}
353+
auto check_device = [device](HostDeviceVector<float> const &v) {
354+
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
355+
device == GenericParameter::kCpuId ||
356+
v.DeviceIdx() == device)
357+
<< "Data is resided on a different device than `gpu_id`. "
358+
<< "Device that data is on: " << v.DeviceIdx() << ", "
359+
<< "`gpu_id` for XGBoost: " << device;
360+
};
361+
353362
if (weights_.Size() != 0) {
354363
CHECK_EQ(weights_.Size(), num_row_)
355364
<< "Size of weights must equal to number of rows.";
365+
check_device(weights_);
356366
return;
357367
}
358368
if (labels_.Size() != 0) {
359369
CHECK_EQ(labels_.Size(), num_row_)
360370
<< "Size of labels must equal to number of rows.";
371+
check_device(labels_);
361372
return;
362373
}
363374
if (labels_lower_bound_.Size() != 0) {
364375
CHECK_EQ(labels_lower_bound_.Size(), num_row_)
365376
<< "Size of label_lower_bound must equal to number of rows.";
377+
check_device(labels_lower_bound_);
366378
return;
367379
}
368380
if (labels_upper_bound_.Size() != 0) {
369381
CHECK_EQ(labels_upper_bound_.Size(), num_row_)
370382
<< "Size of label_upper_bound must equal to number of rows.";
383+
check_device(labels_upper_bound_);
371384
return;
372385
}
373386
CHECK_LE(num_nonzero_, num_col_ * num_row_);
374387
if (base_margin_.Size() != 0) {
375388
CHECK_EQ(base_margin_.Size() % num_row_, 0)
376389
<< "Size of base margin must be a multiple of number of rows.";
390+
check_device(base_margin_);
377391
}
378392
}
379393

src/data/device_dmatrix.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ template <typename AdapterT>
201201
DeviceDMatrix::DeviceDMatrix(AdapterT* adapter, float missing, int nthread, int max_bin) {
202202
common::HistogramCuts cuts =
203203
common::AdapterDeviceSketch(adapter, max_bin, missing);
204+
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
204205
auto& batch = adapter->Value();
205206
// Work out how many valid entries we have in each row
206207
dh::caching_device_vector<size_t> row_counts(adapter->NumRows() + 1, 0);

src/data/simple_dmatrix.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
9999
// be supported in future. Does not currently support inferring row/column size
100100
template <typename AdapterT>
101101
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
102+
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
102103
CHECK(adapter->NumRows() != kAdapterUnknownSize);
103104
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
104105

src/learner.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,7 +1052,7 @@ class LearnerImpl : public LearnerIO {
10521052

10531053
void ValidateDMatrix(DMatrix* p_fmat) const {
10541054
MetaInfo const& info = p_fmat->Info();
1055-
info.Validate();
1055+
info.Validate(generic_parameters_.gpu_id);
10561056

10571057
auto const row_based_split = [this]() {
10581058
return tparam_.dsplit == DataSplitMode::kRow ||

tests/cpp/data/test_metainfo.cc

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,17 @@ TEST(MetaInfo, Validate) {
149149
info.num_col_ = 3;
150150
std::vector<xgboost::bst_group_t> groups (11);
151151
info.SetInfo("group", groups.data(), xgboost::DataType::kUInt32, 11);
152-
EXPECT_THROW(info.Validate(), dmlc::Error);
152+
EXPECT_THROW(info.Validate(0), dmlc::Error);
153153

154154
std::vector<float> labels(info.num_row_ + 1);
155155
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
156-
EXPECT_THROW(info.Validate(), dmlc::Error);
156+
EXPECT_THROW(info.Validate(0), dmlc::Error);
157+
158+
#if defined(XGBOOST_USE_CUDA)
159+
info.group_ptr_.clear();
160+
labels.resize(info.num_row_);
161+
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
162+
info.labels_.SetDevice(0);
163+
EXPECT_THROW(info.Validate(1), dmlc::Error);
164+
#endif // defined(XGBOOST_USE_CUDA)
157165
}

tests/python-gpu/test_from_cupy.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,14 @@ def test_dlpack_device_dmat(self):
136136
n = 100
137137
X = cp.random.random((n, 2))
138138
xgb.DeviceQuantileDMatrix(X.toDlpack())
139+
140+
@pytest.mark.skipif(**tm.no_cupy())
141+
@pytest.mark.mgpu
142+
def test_specified_device(self):
143+
import cupy as cp
144+
cp.cuda.runtime.setDevice(0)
145+
dtrain = dmatrix_from_cupy(
146+
np.float32, xgb.DeviceQuantileDMatrix, np.nan)
147+
with pytest.raises(xgb.core.XGBoostError):
148+
xgb.train({'tree_method': 'gpu_hist', 'gpu_id': 1},
149+
dtrain, num_boost_round=10)

tests/python-gpu/test_gpu_prediction.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ def test_sklearn(self):
121121
@pytest.mark.skipif(**tm.no_cupy())
122122
def test_inplace_predict_cupy(self):
123123
import cupy as cp
124+
cp.cuda.runtime.setDevice(0)
124125
rows = 1000
125126
cols = 10
126127
cp_rng = cp.random.RandomState(1994)

0 commit comments

Comments
 (0)