From c768395313bd5530afd0d46a918b4c315e7df00c Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Thu, 16 Apr 2020 20:17:16 +0300 Subject: [PATCH 01/13] distributed --- src/tree/updater_quantile_hist.cc | 43 +++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 84dc625ae1ed..39aec36a4b6b 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -308,7 +308,46 @@ void QuantileHistMaker::Builder::SplitSiblings(const std::vector& n for (auto const& entry : nodes) { int nid = entry.nid; RegTree::Node &node = (*p_tree)[nid]; - if (rabit::IsDistributed()) { + + + +// size_t node_sizes[2] = { row_set_collection_[left_id ].Size(), +// row_set_collection_[right_id].Size() }; +// +// if (rabit::IsDistributed()) { +// // compute amount of samples in each dtree node accross all distributed workers +// rabit::Allreduce(&node_sizes[0], 2); +// } + + +if(node.IsRoot()) +{ + small_siblings->push_back(entry); +} +else +{ + const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); + const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); + size_t node_sizes[2] = { row_set_collection_[left_id ].Size(), + row_set_collection_[right_id].Size() }; + if (rabit::IsDistributed()) { + // compute amount of samples in each dtree node accross all distributed workers + rabit::Allreduce(&node_sizes[0], 2); + } + if(nid == left_id && node_sizes[0] < node_sizes[1]) + { + small_siblings->push_back(entry); + } + else if(nid == right_id && node_sizes[1] <= node_sizes[0]) + { + small_siblings->push_back(entry); + } + else + { + big_siblings->push_back(entry); + } +} +/* if (rabit::IsDistributed()) { if (node.IsRoot() || node.IsLeftChild()) { small_siblings->push_back(entry); } else { @@ -328,7 +367,7 @@ void QuantileHistMaker::Builder::SplitSiblings(const std::vector& n } else { big_siblings->push_back(entry); } - } + }*/ } } From 977a4029c44aed8fd5e1bf8b47a09e7318ebf8a2 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Fri, 17 Apr 2020 13:11:47 +0300 Subject: [PATCH 02/13] distributed --- src/tree/updater_quantile_hist.cc | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 39aec36a4b6b..43374882e5fd 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -118,17 +118,37 @@ void QuantileHistMaker::Builder::SyncHistograms( auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; auto sibling_hist = hist_[entry.sibling_nid]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); +// SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); } }); if (isDistributed) { + + this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); + common::BlockedSpace2d space2(nodes_for_subtraction_trick_.size(), [&](size_t node) { + return nbins; + }, 1024); + + common::ParallelFor2d(space2, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes_for_subtraction_trick_[node]; + auto this_hist = hist_[entry.nid]; + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = hist_[entry.sibling_nid]; + + SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); + } + }); + + // use Subtraction Trick - for (auto const& node : nodes_for_subtraction_trick_) { +/* for (auto const& node : nodes_for_subtraction_trick_) { SubtractionTrick(hist_[node.nid], hist_[node.sibling_nid], hist_[(*p_tree)[node.nid].Parent()]); - } + }*/ } builder_monitor_.Stop("SyncHistograms"); From 80f164944553e9a4728cb24fc80ded76d6836d2e Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Fri, 17 Apr 2020 13:48:25 +0300 Subject: [PATCH 03/13] distributed --- src/tree/updater_quantile_hist.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 43374882e5fd..f7a8f8df13c8 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -117,9 +117,9 @@ void QuantileHistMaker::Builder::SyncHistograms( if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1 && !isDistributed) { auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; auto sibling_hist = hist_[entry.sibling_nid]; - -// SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); +//std::cout << "\n---------------------------SubtractionHist---------------------------\n"; + SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + //SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); } }); From 53d41ae976f3e348aaf43dac59ae699244b3c3a8 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Fri, 17 Apr 2020 18:09:20 +0300 Subject: [PATCH 04/13] distributed --- src/tree/updater_quantile_hist.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index f7a8f8df13c8..09b5abbc9c2a 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -127,6 +127,7 @@ void QuantileHistMaker::Builder::SyncHistograms( this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); + common::BlockedSpace2d space2(nodes_for_subtraction_trick_.size(), [&](size_t node) { return nbins; }, 1024); @@ -325,6 +326,7 @@ void QuantileHistMaker::Builder::SplitSiblings(const std::vector& n std::vector* small_siblings, std::vector* big_siblings, RegTree *p_tree) { + builder_monitor_.Start("SplitSiblings"); for (auto const& entry : nodes) { int nid = entry.nid; RegTree::Node &node = (*p_tree)[nid]; @@ -389,6 +391,7 @@ else } }*/ } + builder_monitor_.Stop("SplitSiblings"); } void QuantileHistMaker::Builder::ExpandWithDepthWise( From f5002abd0b5b7573924ff6dc9519b41ac5a95dab Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Mon, 20 Apr 2020 21:56:52 +0300 Subject: [PATCH 05/13] distributed lossguide --- src/tree/updater_quantile_hist.cc | 45 ++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 09b5abbc9c2a..920fdc52a17d 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -326,7 +326,35 @@ void QuantileHistMaker::Builder::SplitSiblings(const std::vector& n std::vector* small_siblings, std::vector* big_siblings, RegTree *p_tree) { + builder_monitor_.Start("SplitSiblings"); + + const size_t size = nodes.size(); + //std::cout << "-------------------------size: " << size << "-------------------------\n"; + std::vector node_sizes1(size * 2); + //size_t* node_sizes1 = new size_t[2]; + //size_t node_sizes1[/*size * */256*2]; + size_t ii = 0; + for (size_t i = 0; i < nodes.size(); i++) { + int nid = nodes[i].nid; + RegTree::Node &node = (*p_tree)[nid]; +if(!node.IsRoot()) +{ + const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); + const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); + node_sizes1[ii] = row_set_collection_[left_id ].Size(); + node_sizes1[ii + 1] = row_set_collection_[right_id].Size(); + //node_sizes[ii] = row_set_collection_[left_id ].Size(); + //node_sizes[ii + 1] = row_set_collection_[right_id ].Size(); + ii += 2; +} + } + if(rabit::IsDistributed()) + { + rabit::Allreduce(&node_sizes1[0], size * 2); + } +//CHECK_EQ(size*2,i); + for (auto const& entry : nodes) { int nid = entry.nid; RegTree::Node &node = (*p_tree)[nid]; @@ -486,15 +514,18 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( ExpandEntry right_node(cright, cleft, p_tree->GetDepth(cright), 0.0f, timestamp++); + size_t node_sizes[2] = { row_set_collection_[cleft].Size(), + row_set_collection_[cright].Size() }; if (rabit::IsDistributed()) { - // in distributed mode, we need to keep consistent across workers + rabit::Allreduce(&node_sizes[0], 2); + } + if(node_sizes[0] < node_sizes[1]) + { BuildHistogramsLossGuide(left_node, gmat, gmatb, p_tree, gpair_h); - } else { - if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { - BuildHistogramsLossGuide(left_node, gmat, gmatb, p_tree, gpair_h); - } else { - BuildHistogramsLossGuide(right_node, gmat, gmatb, p_tree, gpair_h); - } + } + else + { + BuildHistogramsLossGuide(right_node, gmat, gmatb, p_tree, gpair_h); } this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree); From 2e17f8a1b55979c74bf54b49b1f5c61a89bfacb1 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Thu, 23 Apr 2020 18:39:52 +0300 Subject: [PATCH 06/13] extra allreduce was deleted --- src/tree/updater_quantile_hist.cc | 204 ++++++++++-------------------- src/tree/updater_quantile_hist.h | 3 + 2 files changed, 67 insertions(+), 140 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 920fdc52a17d..fd25511c32c0 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -57,7 +57,6 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, if (dmat != p_last_dmat_ || is_gmat_initialized_ == false) { gmat_.Init(dmat, static_cast(param_.max_bin)); column_matrix_.Init(gmat_, param_.sparse_threshold); - if (param_.enable_feature_grouping > 0) { gmatb_.Init(gmat_, column_matrix_, param_); } @@ -80,6 +79,7 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, for (auto tree : trees) { builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree); } + param_.learning_rate = lr; p_last_dmat_ = dmat; @@ -114,42 +114,28 @@ void QuantileHistMaker::Builder::SyncHistograms( // Merging histograms from each thread into once hist_buffer_.ReduceHist(node, r.begin(), r.end()); + // Store posible parent node + auto this_local = phist_local_[entry.nid]; + CopyHist(this_local, this_hist, r.begin(), r.end()); + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1 && !isDistributed) { auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; auto sibling_hist = hist_[entry.sibling_nid]; -//std::cout << "\n---------------------------SubtractionHist---------------------------\n"; SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - //SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); - } - }); - - if (isDistributed) { - - - this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); - - common::BlockedSpace2d space2(nodes_for_subtraction_trick_.size(), [&](size_t node) { - return nbins; - }, 1024); - - common::ParallelFor2d(space2, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_subtraction_trick_[node]; - auto this_hist = hist_[entry.nid]; - - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + } else if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { auto sibling_hist = hist_[entry.sibling_nid]; - - SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); + auto parent_hist = phist_local_[(*p_tree)[entry.nid].Parent()]; + SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + // Store posible parent node + auto sibling_local = phist_local_[entry.sibling_nid]; + CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); } }); - - - // use Subtraction Trick -/* for (auto const& node : nodes_for_subtraction_trick_) { - SubtractionTrick(hist_[node.nid], hist_[node.sibling_nid], - hist_[(*p_tree)[node.nid].Parent()]); - }*/ + if (isDistributed) { + builder_monitor_.Start("SyncHistograms:histred_.Allreduce"); + int m = nodes_for_explicit_hist_build_.size() + nodes_for_subtraction_trick_.size(); + this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * m); + builder_monitor_.Stop("SyncHistograms:histred_.Allreduce"); } builder_monitor_.Stop("SyncHistograms"); @@ -181,18 +167,29 @@ void QuantileHistMaker::Builder::BuildHistogramsLossGuide( void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_count) { builder_monitor_.Start("AddHistRows"); - - for (auto const& entry : nodes_for_explicit_hist_build_) { - int nid = entry.nid; + int rank = rabit::GetRank(); + std::vector merged_hist(nodes_for_explicit_hist_build_.size() + + nodes_for_subtraction_trick_.size()); + for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { + merged_hist[i] = nodes_for_explicit_hist_build_[i].nid; + } + for (size_t i = 0; i < nodes_for_subtraction_trick_.size(); ++i) { + merged_hist[nodes_for_explicit_hist_build_.size() + i] = + nodes_for_subtraction_trick_[i].nid; + } + std::sort(merged_hist.begin(), merged_hist.end()); + for (auto const& nid : merged_hist) { hist_.AddHistRow(nid); - (*starting_index) = std::min(nid, (*starting_index)); } - (*sync_count) = nodes_for_explicit_hist_build_.size(); - - for (auto const& node : nodes_for_subtraction_trick_) { - hist_.AddHistRow(node.nid); + if (rabit::IsDistributed()) { + for (auto const& nid : merged_hist) { + phist_local_.AddHistRow(nid); + } } + (*starting_index) = merged_hist[0]; + (*sync_count) = merged_hist.size(); + builder_monitor_.Stop("AddHistRows"); } @@ -205,6 +202,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms( builder_monitor_.Start("BuildLocalHistograms"); const size_t n_nodes = nodes_for_explicit_hist_build_.size(); + // create space of size (# rows in each node) common::BlockedSpace2d space(n_nodes, [&](size_t node) { const int32_t nid = nodes_for_explicit_hist_build_[node].nid; @@ -326,98 +324,26 @@ void QuantileHistMaker::Builder::SplitSiblings(const std::vector& n std::vector* small_siblings, std::vector* big_siblings, RegTree *p_tree) { - builder_monitor_.Start("SplitSiblings"); - - const size_t size = nodes.size(); - //std::cout << "-------------------------size: " << size << "-------------------------\n"; - std::vector node_sizes1(size * 2); - //size_t* node_sizes1 = new size_t[2]; - //size_t node_sizes1[/*size * */256*2]; - size_t ii = 0; - for (size_t i = 0; i < nodes.size(); i++) { - int nid = nodes[i].nid; - RegTree::Node &node = (*p_tree)[nid]; -if(!node.IsRoot()) -{ - const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); - const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); - node_sizes1[ii] = row_set_collection_[left_id ].Size(); - node_sizes1[ii + 1] = row_set_collection_[right_id].Size(); - //node_sizes[ii] = row_set_collection_[left_id ].Size(); - //node_sizes[ii + 1] = row_set_collection_[right_id ].Size(); - ii += 2; -} - } - if(rabit::IsDistributed()) - { - rabit::Allreduce(&node_sizes1[0], size * 2); - } -//CHECK_EQ(size*2,i); - for (auto const& entry : nodes) { int nid = entry.nid; RegTree::Node &node = (*p_tree)[nid]; - - - -// size_t node_sizes[2] = { row_set_collection_[left_id ].Size(), -// row_set_collection_[right_id].Size() }; -// -// if (rabit::IsDistributed()) { -// // compute amount of samples in each dtree node accross all distributed workers -// rabit::Allreduce(&node_sizes[0], 2); -// } - - -if(node.IsRoot()) -{ - small_siblings->push_back(entry); -} -else -{ - const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); - const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); - size_t node_sizes[2] = { row_set_collection_[left_id ].Size(), - row_set_collection_[right_id].Size() }; - if (rabit::IsDistributed()) { - // compute amount of samples in each dtree node accross all distributed workers - rabit::Allreduce(&node_sizes[0], 2); - } - if(nid == left_id && node_sizes[0] < node_sizes[1]) - { - small_siblings->push_back(entry); - } - else if(nid == right_id && node_sizes[1] <= node_sizes[0]) - { - small_siblings->push_back(entry); - } - else - { - big_siblings->push_back(entry); - } -} -/* if (rabit::IsDistributed()) { - if (node.IsRoot() || node.IsLeftChild()) { - small_siblings->push_back(entry); - } else { - big_siblings->push_back(entry); - } + if (node.IsRoot()) { + small_siblings->push_back(entry); } else { - if (!node.IsRoot() && node.IsLeftChild() && - (row_set_collection_[nid].Size() < - row_set_collection_[(*p_tree)[node.Parent()].RightChild()].Size())) { - small_siblings->push_back(entry); - } else if (!node.IsRoot() && !node.IsLeftChild() && - (row_set_collection_[nid].Size() <= - row_set_collection_[(*p_tree)[node.Parent()].LeftChild()].Size())) { + const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); + const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); + size_t node_sizes[2] = { row_set_collection_[left_id ].Size(), + row_set_collection_[right_id].Size() }; + + if (nid == left_id && node_sizes[0] < node_sizes[1]) { small_siblings->push_back(entry); - } else if (node.IsRoot()) { + } else if (nid == right_id && node_sizes[1] <= node_sizes[0]) { small_siblings->push_back(entry); } else { big_siblings->push_back(entry); } - }*/ + } } builder_monitor_.Stop("SplitSiblings"); } @@ -440,17 +366,18 @@ void QuantileHistMaker::Builder::ExpandWithDepthWise( int starting_index = std::numeric_limits::max(); int sync_count = 0; std::vector temp_qexpand_depth; - +int rank = rabit::GetRank(); SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_, &nodes_for_subtraction_trick_, p_tree); AddHistRows(&starting_index, &sync_count); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); SyncHistograms(starting_index, sync_count, p_tree); - BuildNodeStats(gmat, p_fmat, p_tree, gpair_h); + EvaluateAndApplySplits(gmat, column_matrix, p_tree, &num_leaves, depth, ×tamp, &temp_qexpand_depth); + // clean up qexpand_depth_wise_.clear(); nodes_for_subtraction_trick_.clear(); @@ -471,7 +398,7 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( DMatrix* p_fmat, RegTree* p_tree, const std::vector& gpair_h) { - + builder_monitor_.Start("ExpandWithLossGuide"); unsigned timestamp = 0; int num_leaves = 0; @@ -516,15 +443,9 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( size_t node_sizes[2] = { row_set_collection_[cleft].Size(), row_set_collection_[cright].Size() }; - if (rabit::IsDistributed()) { - rabit::Allreduce(&node_sizes[0], 2); - } - if(node_sizes[0] < node_sizes[1]) - { + if (node_sizes[0] < node_sizes[1]) { BuildHistogramsLossGuide(left_node, gmat, gmatb, p_tree, gpair_h); - } - else - { + } else { BuildHistogramsLossGuide(right_node, gmat, gmatb, p_tree, gpair_h); } @@ -545,6 +466,7 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( ++num_leaves; // give two and take one, as parent is no longer a leaf } } + builder_monitor_.Stop("ExpandWithLossGuide"); } void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, @@ -561,21 +483,29 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, interaction_constraints_.Reset(); this->InitData(gmat, gpair_h, *p_fmat, *p_tree); - + builder_monitor_.Start("UpdateInternal_1"); if (param_.grow_policy == TrainParam::kLossGuide) { ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h); } else { ExpandWithDepthWise(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h); } + builder_monitor_.Stop("UpdateInternal_1"); + builder_monitor_.Start("UpdateInternal_2"); for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); } + builder_monitor_.Stop("UpdateInternal_2"); + int rank = rabit::GetRank(); + if (rank == 3) { + builder_monitor_.Start("UpdateInternal_3");} pruner_->Update(gpair, p_fmat, std::vector{p_tree}); - + if (rank == 3) { + builder_monitor_.Stop("UpdateInternal_3"); +} builder_monitor_.Stop("Update"); } @@ -708,6 +638,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, // initialize histogram collection uint32_t nbins = gmat.cut.Ptrs().back(); hist_.Init(nbins); + phist_local_.Init(nbins); hist_buffer_.Init(nbins); // initialize histogram builder @@ -1119,18 +1050,15 @@ void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes const HistCollection& hist, RegTree* p_tree) { builder_monitor_.Start("ApplySplit"); - // 1. Find split condition for each split const size_t n_nodes = nodes.size(); std::vector split_conditions; FindSplitConditions(nodes, *p_tree, gmat, &split_conditions); - // 2.1 Create a blocked space of size SUM(samples in each node) common::BlockedSpace2d space(n_nodes, [&](size_t node_in_set) { int32_t nid = nodes[node_in_set].nid; return row_set_collection_[nid].Size(); }, kPartitionBlockSize); - // 2.2 Initialize the partition builder // allocate buffers for storage intermediate results by each thread partition_builder_.Init(space.Size(), n_nodes, [&](size_t node_in_set) { @@ -1139,7 +1067,6 @@ void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes const size_t n_tasks = size / kPartitionBlockSize + !!(size % kPartitionBlockSize); return n_tasks; }); - // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node // Store results in intermediate buffers from partition_builder_ common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) { @@ -1161,7 +1088,6 @@ void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes CHECK(false); // no default behavior } }); - // 3. Compute offsets to copy blocks of row-indexes // from partition_builder_ to row_set_collection_ partition_builder_.CalculateRowOffsets(); @@ -1173,10 +1099,8 @@ void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes partition_builder_.MergeToArray(node_in_set, r.begin(), const_cast(row_set_collection_[nid].begin)); }); - // 5. Add info about splits into row_set_collection_ AddSplitsToRowSet(nodes, p_tree); - builder_monitor_.Stop("ApplySplit"); } diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 0c7acbb6d539..eaa5767e94d0 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -331,6 +331,9 @@ class QuantileHistMaker: public TreeUpdater { std::vector snode_; /*! \brief culmulative histogram of gradients. */ HistCollection hist_; + /*! \brief culmulative local parent histogram of gradients. */ + HistCollection phist_local_; + /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ uint32_t fid_least_bins_; From c37e4cf2330d1483d5a3929f80f5a56528206c66 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Thu, 23 Apr 2020 19:04:14 +0300 Subject: [PATCH 07/13] cleanup --- src/tree/updater_quantile_hist.cc | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index fd25511c32c0..a55b4377e86f 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -167,7 +167,6 @@ void QuantileHistMaker::Builder::BuildHistogramsLossGuide( void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_count) { builder_monitor_.Start("AddHistRows"); - int rank = rabit::GetRank(); std::vector merged_hist(nodes_for_explicit_hist_build_.size() + nodes_for_subtraction_trick_.size()); for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { @@ -366,7 +365,6 @@ void QuantileHistMaker::Builder::ExpandWithDepthWise( int starting_index = std::numeric_limits::max(); int sync_count = 0; std::vector temp_qexpand_depth; -int rank = rabit::GetRank(); SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_, &nodes_for_subtraction_trick_, p_tree); AddHistRows(&starting_index, &sync_count); @@ -483,29 +481,19 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, interaction_constraints_.Reset(); this->InitData(gmat, gpair_h, *p_fmat, *p_tree); - builder_monitor_.Start("UpdateInternal_1"); if (param_.grow_policy == TrainParam::kLossGuide) { ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h); } else { ExpandWithDepthWise(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h); } - builder_monitor_.Stop("UpdateInternal_1"); - builder_monitor_.Start("UpdateInternal_2"); for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); } - builder_monitor_.Stop("UpdateInternal_2"); - - int rank = rabit::GetRank(); - if (rank == 3) { - builder_monitor_.Start("UpdateInternal_3");} pruner_->Update(gpair, p_fmat, std::vector{p_tree}); - if (rank == 3) { - builder_monitor_.Stop("UpdateInternal_3"); -} + builder_monitor_.Stop("Update"); } From ecb5f10d30c1468d99a3009dd814e4f9103d7b0f Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Fri, 24 Apr 2020 11:12:34 +0300 Subject: [PATCH 08/13] cleanup --- src/tree/updater_quantile_hist.cc | 137 +++++++++++++++++++++--------- src/tree/updater_quantile_hist.h | 2 +- 2 files changed, 98 insertions(+), 41 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index a55b4377e86f..fe87601093dd 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -102,40 +102,82 @@ void QuantileHistMaker::Builder::SyncHistograms( builder_monitor_.Start("SyncHistograms"); const bool isDistributed = rabit::IsDistributed(); - const size_t nbins = hist_builder_.GetNumBins(); - common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { - return nbins; - }, 1024); + if (!isDistributed) { + common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { + return nbins; + }, 1024); + + common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes_for_explicit_hist_build_[node]; + auto this_hist = hist_[entry.nid]; + // Merging histograms from each thread into once + hist_buffer_.ReduceHist(node, r.begin(), r.end()); + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1 /*&& !isDistributed*/) { + auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = hist_[entry.sibling_nid]; + SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + } + }); + } else { + common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { + return nbins; + }, 1024); + + common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes_for_explicit_hist_build_[node]; + auto this_hist = hist_[entry.nid]; + // Merging histograms from each thread into once + hist_buffer_.ReduceHist(node, r.begin(), r.end()); - common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_explicit_hist_build_[node]; - auto this_hist = hist_[entry.nid]; - // Merging histograms from each thread into once - hist_buffer_.ReduceHist(node, r.begin(), r.end()); - - // Store posible parent node - auto this_local = phist_local_[entry.nid]; - CopyHist(this_local, this_hist, r.begin(), r.end()); - - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1 && !isDistributed) { - auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = hist_[entry.sibling_nid]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - } else if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto sibling_hist = hist_[entry.sibling_nid]; - auto parent_hist = phist_local_[(*p_tree)[entry.nid].Parent()]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); // Store posible parent node - auto sibling_local = phist_local_[entry.sibling_nid]; - CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); - } - }); - if (isDistributed) { + auto this_local = phist_local_[entry.nid]; + CopyHist(this_local, this_hist, r.begin(), r.end()); + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + auto sibling_hist = hist_[entry.sibling_nid]; + auto parent_hist = phist_local_[(*p_tree)[entry.nid].Parent()]; + SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + // Store posible parent node + auto sibling_local = phist_local_[entry.sibling_nid]; + CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); + } + }); + builder_monitor_.Start("SyncHistograms:histred_.Allreduce"); - int m = nodes_for_explicit_hist_build_.size() + nodes_for_subtraction_trick_.size(); - this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * m); + this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); builder_monitor_.Stop("SyncHistograms:histred_.Allreduce"); + + common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes_for_explicit_hist_build_[node]; + if (!((*p_tree)[entry.nid].IsLeftChild())) { + auto this_hist = hist_[entry.nid]; + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = hist_[entry.sibling_nid]; + SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); + } + } + }); + + common::BlockedSpace2d space2(nodes_for_subtraction_trick_.size(), [&](size_t node) { + return nbins; + }, 1024); + + common::ParallelFor2d(space2, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes_for_subtraction_trick_[node]; + if (!((*p_tree)[entry.nid].IsLeftChild())) { + auto this_hist = hist_[entry.nid]; + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = hist_[entry.sibling_nid]; + SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); + } + } + }); } builder_monitor_.Stop("SyncHistograms"); @@ -159,15 +201,17 @@ void QuantileHistMaker::Builder::BuildHistogramsLossGuide( int starting_index = std::numeric_limits::max(); int sync_count = 0; - AddHistRows(&starting_index, &sync_count); + AddHistRows(&starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); SyncHistograms(starting_index, sync_count, p_tree); } -void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_count) { +void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_count, + RegTree *p_tree) { builder_monitor_.Start("AddHistRows"); - std::vector merged_hist(nodes_for_explicit_hist_build_.size() + + + std::vector merged_hist(nodes_for_explicit_hist_build_.size() + nodes_for_subtraction_trick_.size()); for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { merged_hist[i] = nodes_for_explicit_hist_build_[i].nid; @@ -177,18 +221,31 @@ void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_coun nodes_for_subtraction_trick_[i].nid; } std::sort(merged_hist.begin(), merged_hist.end()); + int n_left = 0; for (auto const& nid : merged_hist) { - hist_.AddHistRow(nid); + if ((*p_tree)[nid].IsLeftChild()) { + hist_.AddHistRow(nid); + (*starting_index) = std::min(nid, (*starting_index)); + n_left++; + if (rabit::IsDistributed()) { + phist_local_.AddHistRow(nid); + } + } } - if (rabit::IsDistributed()) { - for (auto const& nid : merged_hist) { - phist_local_.AddHistRow(nid); + for (auto const& nid : merged_hist) { + if (!((*p_tree)[nid].IsLeftChild())) { + hist_.AddHistRow(nid); + if (rabit::IsDistributed()) { + phist_local_.AddHistRow(nid); + } } } - (*starting_index) = merged_hist[0]; - (*sync_count) = merged_hist.size(); - + (*sync_count) = merged_hist.size() / 2; + if (*sync_count == 0) { + (*sync_count) = 1; + } + CHECK_EQ(n_left, (*sync_count)); builder_monitor_.Stop("AddHistRows"); } @@ -367,7 +424,7 @@ void QuantileHistMaker::Builder::ExpandWithDepthWise( std::vector temp_qexpand_depth; SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_, &nodes_for_subtraction_trick_, p_tree); - AddHistRows(&starting_index, &sync_count); + AddHistRows(&starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); SyncHistograms(starting_index, sync_count, p_tree); diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index eaa5767e94d0..1e4016e0bef8 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -259,7 +259,7 @@ class QuantileHistMaker: public TreeUpdater { RegTree *p_tree, const std::vector &gpair_h); - void AddHistRows(int *starting_index, int *sync_count); + void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree); void BuildHistogramsLossGuide( ExpandEntry entry, From a54153caf1758aa9c664c697a313f5f0c29c5356 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Wed, 29 Apr 2020 11:15:02 +0300 Subject: [PATCH 09/13] fix comments, and reduced memory for all reduce --- src/tree/updater_quantile_hist.cc | 20 +++++++++++--------- src/tree/updater_quantile_hist.h | 6 ++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index fe87601093dd..3f08d3f14ea1 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -55,11 +55,13 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, DMatrix *dmat, const std::vector &trees) { if (dmat != p_last_dmat_ || is_gmat_initialized_ == false) { + updater_monitor_.Start("GmatInitialization"); gmat_.Init(dmat, static_cast(param_.max_bin)); column_matrix_.Init(gmat_, param_.sparse_threshold); if (param_.enable_feature_grouping > 0) { gmatb_.Init(gmat_, column_matrix_, param_); } + updater_monitor_.Stop("GmatInitialization"); // A proper solution is puting cut matrix in DMatrix, see: // https://github.com/dmlc/xgboost/issues/5143 is_gmat_initialized_ = true; @@ -145,9 +147,9 @@ void QuantileHistMaker::Builder::SyncHistograms( } }); - builder_monitor_.Start("SyncHistograms:histred_.Allreduce"); + builder_monitor_.Start("SyncHistogramsAllreduce"); this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); - builder_monitor_.Stop("SyncHistograms:histred_.Allreduce"); + builder_monitor_.Stop("SyncHistogramsAllreduce"); common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { const auto entry = nodes_for_explicit_hist_build_[node]; @@ -389,12 +391,12 @@ void QuantileHistMaker::Builder::SplitSiblings(const std::vector& n } else { const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); - size_t node_sizes[2] = { row_set_collection_[left_id ].Size(), - row_set_collection_[right_id].Size() }; - if (nid == left_id && node_sizes[0] < node_sizes[1]) { + if (nid == left_id && row_set_collection_[left_id ].Size() < + row_set_collection_[right_id].Size()) { small_siblings->push_back(entry); - } else if (nid == right_id && node_sizes[1] <= node_sizes[0]) { + } else if (nid == right_id && row_set_collection_[right_id].Size() <= + row_set_collection_[left_id ].Size()) { small_siblings->push_back(entry); } else { big_siblings->push_back(entry); @@ -496,9 +498,7 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( ExpandEntry right_node(cright, cleft, p_tree->GetDepth(cright), 0.0f, timestamp++); - size_t node_sizes[2] = { row_set_collection_[cleft].Size(), - row_set_collection_[cright].Size() }; - if (node_sizes[0] < node_sizes[1]) { + if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { BuildHistogramsLossGuide(left_node, gmat, gmatb, p_tree, gpair_h); } else { BuildHistogramsLossGuide(right_node, gmat, gmatb, p_tree, gpair_h); @@ -549,7 +549,9 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); } + builder_monitor_.Start("PrunerUpdate"); pruner_->Update(gpair, p_fmat, std::vector{p_tree}); + builder_monitor_.Stop("PrunerUpdate"); builder_monitor_.Stop("Update"); } diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 1e4016e0bef8..0840870dd98a 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -81,7 +81,9 @@ using xgboost::common::Column; /*! \brief construct a tree using quantized feature values */ class QuantileHistMaker: public TreeUpdater { public: - QuantileHistMaker() = default; + QuantileHistMaker() { + updater_monitor_.Init("Quantile"); + } void Configure(const Args& args) override; void Update(HostDeviceVector* gpair, @@ -371,7 +373,7 @@ class QuantileHistMaker: public TreeUpdater { common::ParallelGHistBuilder hist_buffer_; rabit::Reducer histred_; }; - + common::Monitor updater_monitor_; std::unique_ptr builder_; std::unique_ptr pruner_; std::unique_ptr spliteval_; From b6726b5843593d596f9ea1ccc77a72ff31cad8bf Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Wed, 6 May 2020 15:27:07 +0300 Subject: [PATCH 10/13] comments were applied --- src/tree/updater_prune.cc | 6 +- src/tree/updater_quantile_hist.cc | 115 ++++++++++++------------------ src/tree/updater_quantile_hist.h | 7 +- 3 files changed, 57 insertions(+), 71 deletions(-) diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc index 5a31a90dd36d..76a8916a0598 100644 --- a/src/tree/updater_prune.cc +++ b/src/tree/updater_prune.cc @@ -14,7 +14,7 @@ #include "xgboost/json.h" #include "./param.h" #include "../common/io.h" - +#include "../common/timer.h" namespace xgboost { namespace tree { @@ -25,6 +25,7 @@ class TreePruner: public TreeUpdater { public: TreePruner() { syncher_.reset(TreeUpdater::Create("sync", tparam_)); + pruner_monitor_.Init("TreePruner"); } char const* Name() const override { return "prune"; @@ -52,6 +53,7 @@ class TreePruner: public TreeUpdater { void Update(HostDeviceVector *gpair, DMatrix *p_fmat, const std::vector &trees) override { + pruner_monitor_.Start("PrunerUpdate"); // rescale learning rate according to size of trees float lr = param_.learning_rate; param_.learning_rate = lr / trees.size(); @@ -60,6 +62,7 @@ class TreePruner: public TreeUpdater { } param_.learning_rate = lr; syncher_->Update(gpair, p_fmat, trees); + pruner_monitor_.Stop("PrunerUpdate"); } private: @@ -105,6 +108,7 @@ class TreePruner: public TreeUpdater { std::unique_ptr syncher_; // training parameter TrainParam param_; + common::Monitor pruner_monitor_; }; XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune") diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 3f08d3f14ea1..83ffabee2a3d 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -97,6 +97,23 @@ bool QuantileHistMaker::UpdatePredictionCache( } } +void QuantileHistMaker::Builder::ParallelSubtractionHist(const common::BlockedSpace2d& space, + const std::vector& nodes, + const RegTree * p_tree) { + common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes[node]; + if (!((*p_tree)[entry.nid].IsLeftChild())) { + auto this_hist = hist_[entry.nid]; + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = hist_[entry.sibling_nid]; + SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); + } + } + }); +} + void QuantileHistMaker::Builder::SyncHistograms( int starting_index, int sync_count, @@ -105,81 +122,44 @@ void QuantileHistMaker::Builder::SyncHistograms( const bool isDistributed = rabit::IsDistributed(); const size_t nbins = hist_builder_.GetNumBins(); - if (!isDistributed) { - common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { - return nbins; - }, 1024); - - common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_explicit_hist_build_[node]; - auto this_hist = hist_[entry.nid]; - // Merging histograms from each thread into once - hist_buffer_.ReduceHist(node, r.begin(), r.end()); - - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1 /*&& !isDistributed*/) { - auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = hist_[entry.sibling_nid]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - } - }); - } else { - common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { - return nbins; - }, 1024); - - common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_explicit_hist_build_[node]; - auto this_hist = hist_[entry.nid]; - // Merging histograms from each thread into once - hist_buffer_.ReduceHist(node, r.begin(), r.end()); - + common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { + return nbins; + }, 1024); + common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes_for_explicit_hist_build_[node]; + auto this_hist = hist_[entry.nid]; + // Merging histograms from each thread into once + hist_buffer_.ReduceHist(node, r.begin(), r.end()); + if (isDistributed) { // Store posible parent node - auto this_local = phist_local_[entry.nid]; + auto this_local = hist_local_worker_[entry.nid]; CopyHist(this_local, this_hist, r.begin(), r.end()); + } - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto sibling_hist = hist_[entry.sibling_nid]; - auto parent_hist = phist_local_[(*p_tree)[entry.nid].Parent()]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + const size_t parent_id = (*p_tree)[entry.nid].Parent(); + auto parent_hist = isDistributed ? hist_local_worker_[parent_id] : hist_[parent_id]; + auto sibling_hist = hist_[entry.sibling_nid]; + SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); + if (isDistributed) { // Store posible parent node - auto sibling_local = phist_local_[entry.sibling_nid]; + auto sibling_local = hist_local_worker_[entry.sibling_nid]; CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); } - }); + } + }); + if (isDistributed) { builder_monitor_.Start("SyncHistogramsAllreduce"); this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); builder_monitor_.Stop("SyncHistogramsAllreduce"); - common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_explicit_hist_build_[node]; - if (!((*p_tree)[entry.nid].IsLeftChild())) { - auto this_hist = hist_[entry.nid]; - - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = hist_[entry.sibling_nid]; - SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); - } - } - }); + ParallelSubtractionHist(space, nodes_for_explicit_hist_build_, p_tree); common::BlockedSpace2d space2(nodes_for_subtraction_trick_.size(), [&](size_t node) { return nbins; }, 1024); - - common::ParallelFor2d(space2, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_subtraction_trick_[node]; - if (!((*p_tree)[entry.nid].IsLeftChild())) { - auto this_hist = hist_[entry.nid]; - - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = hist_[entry.sibling_nid]; - SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); - } - } - }); + ParallelSubtractionHist(space2, nodes_for_subtraction_trick_, p_tree); } builder_monitor_.Stop("SyncHistograms"); @@ -230,7 +210,7 @@ void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_coun (*starting_index) = std::min(nid, (*starting_index)); n_left++; if (rabit::IsDistributed()) { - phist_local_.AddHistRow(nid); + hist_local_worker_.AddHistRow(nid); } } } @@ -238,16 +218,17 @@ void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_coun if (!((*p_tree)[nid].IsLeftChild())) { hist_.AddHistRow(nid); if (rabit::IsDistributed()) { - phist_local_.AddHistRow(nid); + hist_local_worker_.AddHistRow(nid); } } } - (*sync_count) = merged_hist.size() / 2; - if (*sync_count == 0) { + if (n_left == 0) { (*sync_count) = 1; + } else { + (*sync_count) = n_left; } - CHECK_EQ(n_left, (*sync_count)); + builder_monitor_.Stop("AddHistRows"); } @@ -549,9 +530,7 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); } - builder_monitor_.Start("PrunerUpdate"); pruner_->Update(gpair, p_fmat, std::vector{p_tree}); - builder_monitor_.Stop("PrunerUpdate"); builder_monitor_.Stop("Update"); } @@ -685,7 +664,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, // initialize histogram collection uint32_t nbins = gmat.cut.Ptrs().back(); hist_.Init(nbins); - phist_local_.Init(nbins); + hist_local_worker_.Init(nbins); hist_buffer_.Init(nbins); // initialize histogram builder diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 0840870dd98a..854b1359b3ee 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -82,7 +82,7 @@ using xgboost::common::Column; class QuantileHistMaker: public TreeUpdater { public: QuantileHistMaker() { - updater_monitor_.Init("Quantile"); + updater_monitor_.Init("QuantileHistMaker"); } void Configure(const Args& args) override; @@ -281,6 +281,9 @@ class QuantileHistMaker: public TreeUpdater { void SyncHistograms(int starting_index, int sync_count, RegTree *p_tree); + void ParallelSubtractionHist(const common::BlockedSpace2d& space, + const std::vector& nodes, + const RegTree * p_tree); void BuildNodeStats(const GHistIndexMatrix &gmat, DMatrix *p_fmat, @@ -334,7 +337,7 @@ class QuantileHistMaker: public TreeUpdater { /*! \brief culmulative histogram of gradients. */ HistCollection hist_; /*! \brief culmulative local parent histogram of gradients. */ - HistCollection phist_local_; + HistCollection hist_local_worker_; /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ From dbe9c1fee02b95d099ece2769b4cc48f3d794272 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Fri, 8 May 2020 21:43:54 +0300 Subject: [PATCH 11/13] strategy pattern was applied --- src/tree/updater_quantile_hist.cc | 227 +++++++++++++++++------------- src/tree/updater_quantile_hist.h | 82 ++++++++++- 2 files changed, 206 insertions(+), 103 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 83ffabee2a3d..1b27e653fb09 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -77,6 +77,13 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, std::move(pruner_), std::unique_ptr(spliteval_->GetHostClone()), int_constraint_, dmat)); + if (rabit::IsDistributed()) { + builder_->SetHistSynchronizer(new DistributedHistSynchronizer(builder_.get())); + builder_->SetHistRowsAdder(new DistributedHistRowsAdder(builder_.get())); + } else { + builder_->SetHistSynchronizer(new BatchHistSynchronizer(builder_.get())); + builder_->SetHistRowsAdder(new BatchHistRowsAdder(builder_.get())); + } } for (auto tree : trees) { builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree); @@ -97,72 +104,146 @@ bool QuantileHistMaker::UpdatePredictionCache( } } -void QuantileHistMaker::Builder::ParallelSubtractionHist(const common::BlockedSpace2d& space, - const std::vector& nodes, - const RegTree * p_tree) { - common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes[node]; - if (!((*p_tree)[entry.nid].IsLeftChild())) { - auto this_hist = hist_[entry.nid]; +void BatchHistSynchronizer::SyncHistograms(int starting_index, + int sync_count, + RegTree *p_tree) { + builder_->builder_monitor_.Start("SyncHistograms"); + const size_t nbins = builder_->hist_builder_.GetNumBins(); + common::BlockedSpace2d space(builder_->nodes_for_explicit_hist_build_.size(), [&](size_t node) { + return nbins; + }, 1024); - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = hist_[entry.sibling_nid]; - SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); - } + common::ParallelFor2d(space, builder_->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = builder_->nodes_for_explicit_hist_build_[node]; + auto this_hist = builder_->hist_[entry.nid]; + // Merging histograms from each thread into once + builder_->hist_buffer_.ReduceHist(node, r.begin(), r.end()); + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + const size_t parent_id = (*p_tree)[entry.nid].Parent(); + auto parent_hist = builder_->hist_[parent_id]; + auto sibling_hist = builder_->hist_[entry.sibling_nid]; + SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); } }); + builder_->builder_monitor_.Stop("SyncHistograms"); } -void QuantileHistMaker::Builder::SyncHistograms( - int starting_index, - int sync_count, - RegTree *p_tree) { - builder_monitor_.Start("SyncHistograms"); - - const bool isDistributed = rabit::IsDistributed(); - const size_t nbins = hist_builder_.GetNumBins(); - common::BlockedSpace2d space(nodes_for_explicit_hist_build_.size(), [&](size_t node) { +void DistributedHistSynchronizer::SyncHistograms(int starting_index, + int sync_count, + RegTree *p_tree) { + builder_->builder_monitor_.Start("SyncHistograms"); + const size_t nbins = builder_->hist_builder_.GetNumBins(); + common::BlockedSpace2d space(builder_->nodes_for_explicit_hist_build_.size(), [&](size_t node) { return nbins; }, 1024); - common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = nodes_for_explicit_hist_build_[node]; - auto this_hist = hist_[entry.nid]; + common::ParallelFor2d(space, builder_->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = builder_->nodes_for_explicit_hist_build_[node]; + auto this_hist = builder_->hist_[entry.nid]; // Merging histograms from each thread into once - hist_buffer_.ReduceHist(node, r.begin(), r.end()); - if (isDistributed) { - // Store posible parent node - auto this_local = hist_local_worker_[entry.nid]; - CopyHist(this_local, this_hist, r.begin(), r.end()); - } + builder_->hist_buffer_.ReduceHist(node, r.begin(), r.end()); + // Store posible parent node + auto this_local = builder_->hist_local_worker_[entry.nid]; + CopyHist(this_local, this_hist, r.begin(), r.end()); if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { const size_t parent_id = (*p_tree)[entry.nid].Parent(); - auto parent_hist = isDistributed ? hist_local_worker_[parent_id] : hist_[parent_id]; - auto sibling_hist = hist_[entry.sibling_nid]; + auto parent_hist = builder_->hist_local_worker_[parent_id]; + auto sibling_hist = builder_->hist_[entry.sibling_nid]; SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - if (isDistributed) { - // Store posible parent node - auto sibling_local = hist_local_worker_[entry.sibling_nid]; - CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); + // Store posible parent node + auto sibling_local = builder_->hist_local_worker_[entry.sibling_nid]; + CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); + } + }); + builder_->builder_monitor_.Start("SyncHistogramsAllreduce"); + this->builder_->histred_.Allreduce(builder_->hist_[starting_index].data(), + builder_->hist_builder_.GetNumBins() * sync_count); + builder_->builder_monitor_.Stop("SyncHistogramsAllreduce"); + + ParallelSubtractionHist(space, builder_->nodes_for_explicit_hist_build_, p_tree); + + common::BlockedSpace2d space2(builder_->nodes_for_subtraction_trick_.size(), [&](size_t node) { + return nbins; + }, 1024); + ParallelSubtractionHist(space2, builder_->nodes_for_subtraction_trick_, p_tree); + builder_->builder_monitor_.Stop("SyncHistograms"); +} + +void DistributedHistSynchronizer::ParallelSubtractionHist(const common::BlockedSpace2d& space, + const std::vector& nodes, + const RegTree * p_tree) { + common::ParallelFor2d(space, builder_->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = nodes[node]; + if (!((*p_tree)[entry.nid].IsLeftChild())) { + auto this_hist = builder_->hist_[entry.nid]; + + if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + auto parent_hist = builder_->hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = builder_->hist_[entry.sibling_nid]; + SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); } } }); +} + +void BatchHistRowsAdder::AddHistRows(int *starting_index, int *sync_count, + RegTree *p_tree) { + builder_->builder_monitor_.Start("AddHistRows"); + + for (auto const& entry : builder_->nodes_for_explicit_hist_build_) { + int nid = entry.nid; + builder_->hist_.AddHistRow(nid); + (*starting_index) = std::min(nid, (*starting_index)); + } + (*sync_count) = builder_->nodes_for_explicit_hist_build_.size(); - if (isDistributed) { - builder_monitor_.Start("SyncHistogramsAllreduce"); - this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); - builder_monitor_.Stop("SyncHistogramsAllreduce"); + for (auto const& node : builder_->nodes_for_subtraction_trick_) { + builder_->hist_.AddHistRow(node.nid); + } - ParallelSubtractionHist(space, nodes_for_explicit_hist_build_, p_tree); + builder_->builder_monitor_.Stop("AddHistRows"); +} - common::BlockedSpace2d space2(nodes_for_subtraction_trick_.size(), [&](size_t node) { - return nbins; - }, 1024); - ParallelSubtractionHist(space2, nodes_for_subtraction_trick_, p_tree); +void DistributedHistRowsAdder::AddHistRows(int *starting_index, int *sync_count, + RegTree *p_tree) { + builder_->builder_monitor_.Start("AddHistRows"); + const size_t explicit_size = builder_->nodes_for_explicit_hist_build_.size(); + const size_t subtaction_size = builder_->nodes_for_subtraction_trick_.size(); + std::vector merged_node_ids(explicit_size + subtaction_size); + for (size_t i = 0; i < explicit_size; ++i) { + merged_node_ids[i] = builder_->nodes_for_explicit_hist_build_[i].nid; + } + for (size_t i = 0; i < subtaction_size; ++i) { + merged_node_ids[explicit_size + i] = + builder_->nodes_for_subtraction_trick_[i].nid; + } + std::sort(merged_node_ids.begin(), merged_node_ids.end()); + int n_left = 0; + for (auto const& nid : merged_node_ids) { + if ((*p_tree)[nid].IsLeftChild()) { + builder_->hist_.AddHistRow(nid); + (*starting_index) = std::min(nid, (*starting_index)); + n_left++; + builder_->hist_local_worker_.AddHistRow(nid); + } } + for (auto const& nid : merged_node_ids) { + if (!((*p_tree)[nid].IsLeftChild())) { + builder_->hist_.AddHistRow(nid); + builder_->hist_local_worker_.AddHistRow(nid); + } + } + (*sync_count) = std::min(1, n_left); + builder_->builder_monitor_.Stop("AddHistRows"); +} + +void QuantileHistMaker::Builder::SetHistSynchronizer(HistSynchronizer* sync) { + hist_synchronizer_.reset(sync); +} - builder_monitor_.Stop("SyncHistograms"); +void QuantileHistMaker::Builder::SetHistRowsAdder(HistRowsAdder* adder) { + hist_rows_adder_.reset(adder); } void QuantileHistMaker::Builder::BuildHistogramsLossGuide( @@ -183,56 +264,11 @@ void QuantileHistMaker::Builder::BuildHistogramsLossGuide( int starting_index = std::numeric_limits::max(); int sync_count = 0; - AddHistRows(&starting_index, &sync_count, p_tree); + hist_rows_adder_->AddHistRows(&starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); - SyncHistograms(starting_index, sync_count, p_tree); -} - - -void QuantileHistMaker::Builder::AddHistRows(int *starting_index, int *sync_count, - RegTree *p_tree) { - builder_monitor_.Start("AddHistRows"); - - std::vector merged_hist(nodes_for_explicit_hist_build_.size() + - nodes_for_subtraction_trick_.size()); - for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { - merged_hist[i] = nodes_for_explicit_hist_build_[i].nid; - } - for (size_t i = 0; i < nodes_for_subtraction_trick_.size(); ++i) { - merged_hist[nodes_for_explicit_hist_build_.size() + i] = - nodes_for_subtraction_trick_[i].nid; - } - std::sort(merged_hist.begin(), merged_hist.end()); - int n_left = 0; - for (auto const& nid : merged_hist) { - if ((*p_tree)[nid].IsLeftChild()) { - hist_.AddHistRow(nid); - (*starting_index) = std::min(nid, (*starting_index)); - n_left++; - if (rabit::IsDistributed()) { - hist_local_worker_.AddHistRow(nid); - } - } - } - for (auto const& nid : merged_hist) { - if (!((*p_tree)[nid].IsLeftChild())) { - hist_.AddHistRow(nid); - if (rabit::IsDistributed()) { - hist_local_worker_.AddHistRow(nid); - } - } - } - - if (n_left == 0) { - (*sync_count) = 1; - } else { - (*sync_count) = n_left; - } - - builder_monitor_.Stop("AddHistRows"); + hist_synchronizer_->SyncHistograms(starting_index, sync_count, p_tree); } - void QuantileHistMaker::Builder::BuildLocalHistograms( const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, @@ -407,10 +443,9 @@ void QuantileHistMaker::Builder::ExpandWithDepthWise( std::vector temp_qexpand_depth; SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_, &nodes_for_subtraction_trick_, p_tree); - AddHistRows(&starting_index, &sync_count, p_tree); - + hist_rows_adder_->AddHistRows(&starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); - SyncHistograms(starting_index, sync_count, p_tree); + hist_synchronizer_->SyncHistograms(starting_index, sync_count, p_tree); BuildNodeStats(gmat, p_fmat, p_tree, gpair_h); EvaluateAndApplySplits(gmat, column_matrix, p_tree, &num_leaves, depth, ×tamp, diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 854b1359b3ee..e4ae31e28fb9 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -78,6 +78,8 @@ using xgboost::common::GHistBuilder; using xgboost::common::ColumnMatrix; using xgboost::common::Column; +class HistSynchronizer; +class HistRowsAdder; /*! \brief construct a tree using quantized feature values */ class QuantileHistMaker: public TreeUpdater { public: @@ -107,6 +109,12 @@ class QuantileHistMaker: public TreeUpdater { } protected: + friend class HistSynchronizer; + friend class BatchHistSynchronizer; + friend class DistributedHistSynchronizer; + friend class HistRowsAdder; + friend class BatchHistRowsAdder; + friend class DistributedHistRowsAdder; // training parameter TrainParam param_; // quantized data matrix @@ -176,8 +184,16 @@ class QuantileHistMaker: public TreeUpdater { bool UpdatePredictionCache(const DMatrix* data, HostDeviceVector* p_out_preds); + void SetHistSynchronizer(HistSynchronizer* sync); + void SetHistRowsAdder(HistRowsAdder* adder); protected: + friend class HistSynchronizer; + friend class BatchHistSynchronizer; + friend class DistributedHistSynchronizer; + friend class HistRowsAdder; + friend class BatchHistRowsAdder; + friend class DistributedHistRowsAdder; /* tree growing policies */ struct ExpandEntry { static const int kRootNid = 0; @@ -261,8 +277,6 @@ class QuantileHistMaker: public TreeUpdater { RegTree *p_tree, const std::vector &gpair_h); - void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree); - void BuildHistogramsLossGuide( ExpandEntry entry, const GHistIndexMatrix &gmat, @@ -278,9 +292,6 @@ class QuantileHistMaker: public TreeUpdater { std::vector* big_siblings, RegTree *p_tree); - void SyncHistograms(int starting_index, - int sync_count, - RegTree *p_tree); void ParallelSubtractionHist(const common::BlockedSpace2d& space, const std::vector& nodes, const RegTree * p_tree); @@ -321,7 +332,6 @@ class QuantileHistMaker: public TreeUpdater { return lhs.loss_chg < rhs.loss_chg; // favor large loss_chg } } - // --data fields-- const TrainParam& param_; // number of omp thread used during training @@ -338,7 +348,6 @@ class QuantileHistMaker: public TreeUpdater { HistCollection hist_; /*! \brief culmulative local parent histogram of gradients. */ HistCollection hist_local_worker_; - /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ uint32_t fid_least_bins_; @@ -375,6 +384,8 @@ class QuantileHistMaker: public TreeUpdater { common::Monitor builder_monitor_; common::ParallelGHistBuilder hist_buffer_; rabit::Reducer histred_; + std::unique_ptr hist_synchronizer_; + std::unique_ptr hist_rows_adder_; }; common::Monitor updater_monitor_; std::unique_ptr builder_; @@ -383,6 +394,63 @@ class QuantileHistMaker: public TreeUpdater { FeatureInteractionConstraintHost int_constraint_; }; +class HistSynchronizer { + public: + explicit HistSynchronizer(QuantileHistMaker::Builder* builder) : builder_(builder) {} + virtual void SyncHistograms(int starting_index, + int sync_count, + RegTree *p_tree) = 0; + + protected: + QuantileHistMaker::Builder* builder_; +}; + +class BatchHistSynchronizer: public HistSynchronizer { + public: + explicit BatchHistSynchronizer(QuantileHistMaker::Builder* builder): HistSynchronizer(builder) {} + + virtual void SyncHistograms(int starting_index, + int sync_count, + RegTree *p_tree); +}; + +class DistributedHistSynchronizer: public HistSynchronizer { + public: + explicit DistributedHistSynchronizer(QuantileHistMaker::Builder* builder): + HistSynchronizer(builder) {} + + virtual void SyncHistograms(int starting_index, + int sync_count, + RegTree *p_tree); + void ParallelSubtractionHist(const common::BlockedSpace2d& space, + const std::vector& nodes, + const RegTree * p_tree); +}; + +class HistRowsAdder { + public: + explicit HistRowsAdder(QuantileHistMaker::Builder* builder) : builder_(builder) {} + virtual void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) = 0; + + protected: + QuantileHistMaker::Builder* builder_; +}; + +class BatchHistRowsAdder: public HistRowsAdder { + public: + explicit BatchHistRowsAdder(QuantileHistMaker::Builder* builder) : HistRowsAdder(builder) {} + + void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree); +}; + +class DistributedHistRowsAdder: public HistRowsAdder { + public: + explicit DistributedHistRowsAdder(QuantileHistMaker::Builder* builder) : HistRowsAdder(builder) {} + + void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree); +}; + + } // namespace tree } // namespace xgboost From 605008f5daada102eef827c2c508713b8e12598d Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Mon, 11 May 2020 22:10:52 +0300 Subject: [PATCH 12/13] tests were added and found bug was fixed(min->max) --- src/tree/updater_quantile_hist.cc | 2 +- src/tree/updater_quantile_hist.h | 18 ++- tests/cpp/tree/test_quantile_hist.cc | 209 ++++++++++++++++++++++++++- 3 files changed, 220 insertions(+), 9 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 1b27e653fb09..5513703dc20d 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -234,7 +234,7 @@ void DistributedHistRowsAdder::AddHistRows(int *starting_index, int *sync_count, builder_->hist_local_worker_.AddHistRow(nid); } } - (*sync_count) = std::min(1, n_left); + (*sync_count) = std::max(1, n_left); builder_->builder_monitor_.Stop("AddHistRows"); } diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index e4ae31e28fb9..a67abbcae949 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -400,6 +400,9 @@ class HistSynchronizer { virtual void SyncHistograms(int starting_index, int sync_count, RegTree *p_tree) = 0; + virtual ~HistSynchronizer() { + builder_ = nullptr; + } protected: QuantileHistMaker::Builder* builder_; @@ -409,9 +412,9 @@ class BatchHistSynchronizer: public HistSynchronizer { public: explicit BatchHistSynchronizer(QuantileHistMaker::Builder* builder): HistSynchronizer(builder) {} - virtual void SyncHistograms(int starting_index, + void SyncHistograms(int starting_index, int sync_count, - RegTree *p_tree); + RegTree *p_tree) override; }; class DistributedHistSynchronizer: public HistSynchronizer { @@ -419,9 +422,9 @@ class DistributedHistSynchronizer: public HistSynchronizer { explicit DistributedHistSynchronizer(QuantileHistMaker::Builder* builder): HistSynchronizer(builder) {} - virtual void SyncHistograms(int starting_index, + void SyncHistograms(int starting_index, int sync_count, - RegTree *p_tree); + RegTree *p_tree) override; void ParallelSubtractionHist(const common::BlockedSpace2d& space, const std::vector& nodes, const RegTree * p_tree); @@ -431,6 +434,9 @@ class HistRowsAdder { public: explicit HistRowsAdder(QuantileHistMaker::Builder* builder) : builder_(builder) {} virtual void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) = 0; + virtual ~HistRowsAdder() { + builder_ = nullptr; + } protected: QuantileHistMaker::Builder* builder_; @@ -440,14 +446,14 @@ class BatchHistRowsAdder: public HistRowsAdder { public: explicit BatchHistRowsAdder(QuantileHistMaker::Builder* builder) : HistRowsAdder(builder) {} - void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree); + void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) override; }; class DistributedHistRowsAdder: public HistRowsAdder { public: explicit DistributedHistRowsAdder(QuantileHistMaker::Builder* builder) : HistRowsAdder(builder) {} - void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree); + void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) override; }; diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 2c16f68fcd96..77ed64fc5077 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -29,7 +29,8 @@ class QuantileHistMock : public QuantileHistMaker { std::unique_ptr spliteval, FeatureInteractionConstraintHost int_constraint, DMatrix const* fmat) - : RealImpl(param, std::move(pruner), std::move(spliteval), std::move(int_constraint), fmat) {} + : RealImpl(param, std::move(pruner), std::move(spliteval), + std::move(int_constraint), fmat) {} public: void TestInitData(const GHistIndexMatrix& gmat, @@ -120,6 +121,147 @@ class QuantileHistMock : public QuantileHistMaker { omp_set_num_threads(nthreads); } + void TestAddHistRows(const GHistIndexMatrix& gmat, + const std::vector& gpair, + DMatrix* p_fmat, + RegTree* tree) { + RealImpl::InitData(gmat, gpair, *p_fmat, *tree); + + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); + + tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + nodes_for_explicit_hist_build_.emplace_back(3, 4, tree->GetDepth(3), 0.0f, 0); + nodes_for_explicit_hist_build_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); + nodes_for_subtraction_trick_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); + nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); + + hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + ASSERT_EQ(sync_count, 2); + ASSERT_EQ(starting_index, 3); + + for (const ExpandEntry& node : nodes_for_explicit_hist_build_) { + ASSERT_EQ(hist_.RowExists(node.nid), true); + } + for (const ExpandEntry& node : nodes_for_subtraction_trick_) { + ASSERT_EQ(hist_.RowExists(node.nid), true); + } + } + + + void TestSyncHistograms(const GHistIndexMatrix& gmat, + const std::vector& gpair, + DMatrix* p_fmat, + RegTree* tree) { + // init + RealImpl::InitData(gmat, gpair, *p_fmat, *tree); + + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); + // level 0 + nodes_for_explicit_hist_build_.emplace_back(0, -1, tree->GetDepth(0), 0.0f, 0); + hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); + // level 1 + nodes_for_explicit_hist_build_.emplace_back((*tree)[0].LeftChild(), (*tree)[0].RightChild(), + tree->GetDepth(1), 0.0f, 0); + nodes_for_subtraction_trick_.emplace_back((*tree)[0].RightChild(), (*tree)[0].LeftChild(), + tree->GetDepth(2), 0.0f, 0); + hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); + // level 2 + nodes_for_explicit_hist_build_.emplace_back(3, 4, tree->GetDepth(3), 0.0f, 0); + nodes_for_subtraction_trick_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); + nodes_for_explicit_hist_build_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); + nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); + hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + + const size_t n_nodes = nodes_for_explicit_hist_build_.size(); + ASSERT_EQ(n_nodes, 2); + row_set_collection_.AddSplit(0, (*tree)[0].LeftChild(), + (*tree)[0].RightChild(), 4, 4); + row_set_collection_.AddSplit(1, (*tree)[1].LeftChild(), + (*tree)[1].RightChild(), 2, 2); + row_set_collection_.AddSplit(2, (*tree)[2].LeftChild(), + (*tree)[2].RightChild(), 2, 2); + + common::BlockedSpace2d space(n_nodes, [&](size_t node) { + const int32_t nid = nodes_for_explicit_hist_build_[node].nid; + return row_set_collection_[nid].Size(); + }, 256); + + std::vector target_hists(n_nodes); + for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { + const int32_t nid = nodes_for_explicit_hist_build_[i].nid; + target_hists[i] = hist_[nid]; + } + + const size_t nbins = hist_builder_.GetNumBins(); + // set values to specific nodes hist + std::vector n_ids = {1, 2}; + for (size_t i : n_ids) { + auto this_hist = hist_[i]; + using FPType = decltype(tree::GradStats::sum_grad); + FPType* p_hist = reinterpret_cast(this_hist.data()); + for (size_t bin_id = 0; bin_id < 2*nbins; ++bin_id) { + p_hist[bin_id] = 2*bin_id; + } + } + n_ids[0] = 3; + n_ids[1] = 5; + for (size_t i : n_ids) { + auto this_hist = hist_[i]; + using FPType = decltype(tree::GradStats::sum_grad); + FPType* p_hist = reinterpret_cast(this_hist.data()); + for (size_t bin_id = 0; bin_id < 2*nbins; ++bin_id) { + p_hist[bin_id] = bin_id; + } + } + + hist_buffer_.Reset(1, n_nodes, space, target_hists); + // sync hist + hist_synchronizer_->SyncHistograms(starting_index, sync_count, tree); + + auto check_hist = [] (const GHistRow parent, const GHistRow left, + const GHistRow right, size_t begin, size_t end) { + using FPType = decltype(tree::GradStats::sum_grad); + const FPType* p_parent = reinterpret_cast(parent.data()); + const FPType* p_left = reinterpret_cast(left.data()); + const FPType* p_right = reinterpret_cast(right.data()); + for (size_t i = 2 * begin; i < 2 * end; ++i) { + ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]); + } + }; + for (const ExpandEntry& node : nodes_for_explicit_hist_build_) { + auto this_hist = hist_[node.nid]; + const size_t parent_id = (*tree)[node.nid].Parent(); + auto parent_hist = hist_[parent_id]; + auto sibling_hist = hist_[node.sibling_nid]; + + check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); + } + for (const ExpandEntry& node : nodes_for_subtraction_trick_) { + auto this_hist = hist_[node.nid]; + const size_t parent_id = (*tree)[node.nid].Parent(); + auto parent_hist = hist_[parent_id]; + auto sibling_hist = hist_[node.sibling_nid]; + + check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); + } + } void TestBuildHist(int nid, const GHistIndexMatrix& gmat, @@ -324,7 +466,7 @@ class QuantileHistMock : public QuantileHistMaker { public: explicit QuantileHistMock( - const std::vector >& args) : + const std::vector >& args, bool batch = true) : cfg_{args} { QuantileHistMaker::Configure(args); spliteval_->Init(¶m_); @@ -336,6 +478,13 @@ class QuantileHistMock : public QuantileHistMaker { std::unique_ptr(spliteval_->GetHostClone()), int_constraint_, dmat_.get())); + if (batch) { + builder_->SetHistSynchronizer(new BatchHistSynchronizer(builder_.get())); + builder_->SetHistRowsAdder(new BatchHistRowsAdder(builder_.get())); + } else { + builder_->SetHistSynchronizer(new DistributedHistSynchronizer(builder_.get())); + builder_->SetHistRowsAdder(new DistributedHistRowsAdder(builder_.get())); + } } ~QuantileHistMock() override = default; @@ -370,6 +519,34 @@ class QuantileHistMock : public QuantileHistMaker { builder_->TestInitDataSampling(gmat, gpair, dmat_.get(), tree); } + + void TestAddHistRows() { + size_t constexpr kMaxBins = 4; + common::GHistIndexMatrix gmat; + gmat.Init(dmat_.get(), kMaxBins); + + RegTree tree = RegTree(); + tree.param.UpdateAllowUnknown(cfg_); + std::vector gpair = + { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, + {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; + builder_->TestAddHistRows(gmat, gpair, dmat_.get(), &tree); + } + + void TestSyncHistograms() { + size_t constexpr kMaxBins = 4; + common::GHistIndexMatrix gmat; + gmat.Init(dmat_.get(), kMaxBins); + + RegTree tree = RegTree(); + tree.param.UpdateAllowUnknown(cfg_); + std::vector gpair = + { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, + {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; + builder_->TestSyncHistograms(gmat, gpair, dmat_.get(), &tree); + } + + void TestBuildHist() { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); @@ -412,6 +589,34 @@ TEST(QuantileHist, InitDataSampling) { maker.TestInitDataSampling(); } +TEST(QuantileHist, AddHistRows) { + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; + QuantileHistMock maker(cfg); + maker.TestAddHistRows(); +} + +TEST(QuantileHist, SyncHistograms) { + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; + QuantileHistMock maker(cfg); + maker.TestSyncHistograms(); +} + +TEST(QuantileHist, DistributedAddHistRows) { + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; + QuantileHistMock maker(cfg, false); + maker.TestAddHistRows(); +} + +TEST(QuantileHist, DistributedSyncHistograms) { + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; + QuantileHistMock maker(cfg, false); + maker.TestSyncHistograms(); +} + TEST(QuantileHist, BuildHist) { // Don't enable feature grouping std::vector> cfg From a9f16f24cd1d9266b4497d56223b34cdf4907d64 Mon Sep 17 00:00:00 2001 From: "SHVETS, KIRILL" Date: Mon, 18 May 2020 15:05:25 +0300 Subject: [PATCH 13/13] comments were applied --- src/tree/updater_quantile_hist.cc | 131 ++++++++++++++------------- src/tree/updater_quantile_hist.h | 49 ++++------ tests/cpp/tree/test_quantile_hist.cc | 18 ++-- 3 files changed, 93 insertions(+), 105 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 5513703dc20d..051a1a44f22c 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -78,11 +78,11 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, std::unique_ptr(spliteval_->GetHostClone()), int_constraint_, dmat)); if (rabit::IsDistributed()) { - builder_->SetHistSynchronizer(new DistributedHistSynchronizer(builder_.get())); - builder_->SetHistRowsAdder(new DistributedHistRowsAdder(builder_.get())); + builder_->SetHistSynchronizer(new DistributedHistSynchronizer()); + builder_->SetHistRowsAdder(new DistributedHistRowsAdder()); } else { - builder_->SetHistSynchronizer(new BatchHistSynchronizer(builder_.get())); - builder_->SetHistRowsAdder(new BatchHistRowsAdder(builder_.get())); + builder_->SetHistSynchronizer(new BatchHistSynchronizer()); + builder_->SetHistRowsAdder(new BatchHistRowsAdder()); } } for (auto tree : trees) { @@ -104,138 +104,143 @@ bool QuantileHistMaker::UpdatePredictionCache( } } -void BatchHistSynchronizer::SyncHistograms(int starting_index, +void BatchHistSynchronizer::SyncHistograms(QuantileHistMaker::Builder* builder, + int starting_index, int sync_count, RegTree *p_tree) { - builder_->builder_monitor_.Start("SyncHistograms"); - const size_t nbins = builder_->hist_builder_.GetNumBins(); - common::BlockedSpace2d space(builder_->nodes_for_explicit_hist_build_.size(), [&](size_t node) { + builder->builder_monitor_.Start("SyncHistograms"); + const size_t nbins = builder->hist_builder_.GetNumBins(); + common::BlockedSpace2d space(builder->nodes_for_explicit_hist_build_.size(), [&](size_t node) { return nbins; }, 1024); - common::ParallelFor2d(space, builder_->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = builder_->nodes_for_explicit_hist_build_[node]; - auto this_hist = builder_->hist_[entry.nid]; + common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = builder->nodes_for_explicit_hist_build_[node]; + auto this_hist = builder->hist_[entry.nid]; // Merging histograms from each thread into once - builder_->hist_buffer_.ReduceHist(node, r.begin(), r.end()); + builder->hist_buffer_.ReduceHist(node, r.begin(), r.end()); if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { const size_t parent_id = (*p_tree)[entry.nid].Parent(); - auto parent_hist = builder_->hist_[parent_id]; - auto sibling_hist = builder_->hist_[entry.sibling_nid]; + auto parent_hist = builder->hist_[parent_id]; + auto sibling_hist = builder->hist_[entry.sibling_nid]; SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); } }); - builder_->builder_monitor_.Stop("SyncHistograms"); + builder->builder_monitor_.Stop("SyncHistograms"); } -void DistributedHistSynchronizer::SyncHistograms(int starting_index, +void DistributedHistSynchronizer::SyncHistograms(QuantileHistMaker::Builder* builder, + int starting_index, int sync_count, RegTree *p_tree) { - builder_->builder_monitor_.Start("SyncHistograms"); - const size_t nbins = builder_->hist_builder_.GetNumBins(); - common::BlockedSpace2d space(builder_->nodes_for_explicit_hist_build_.size(), [&](size_t node) { + builder->builder_monitor_.Start("SyncHistograms"); + const size_t nbins = builder->hist_builder_.GetNumBins(); + common::BlockedSpace2d space(builder->nodes_for_explicit_hist_build_.size(), [&](size_t node) { return nbins; }, 1024); - common::ParallelFor2d(space, builder_->nthread_, [&](size_t node, common::Range1d r) { - const auto entry = builder_->nodes_for_explicit_hist_build_[node]; - auto this_hist = builder_->hist_[entry.nid]; + common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { + const auto entry = builder->nodes_for_explicit_hist_build_[node]; + auto this_hist = builder->hist_[entry.nid]; // Merging histograms from each thread into once - builder_->hist_buffer_.ReduceHist(node, r.begin(), r.end()); + builder->hist_buffer_.ReduceHist(node, r.begin(), r.end()); // Store posible parent node - auto this_local = builder_->hist_local_worker_[entry.nid]; + auto this_local = builder->hist_local_worker_[entry.nid]; CopyHist(this_local, this_hist, r.begin(), r.end()); if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { const size_t parent_id = (*p_tree)[entry.nid].Parent(); - auto parent_hist = builder_->hist_local_worker_[parent_id]; - auto sibling_hist = builder_->hist_[entry.sibling_nid]; + auto parent_hist = builder->hist_local_worker_[parent_id]; + auto sibling_hist = builder->hist_[entry.sibling_nid]; SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); // Store posible parent node - auto sibling_local = builder_->hist_local_worker_[entry.sibling_nid]; + auto sibling_local = builder->hist_local_worker_[entry.sibling_nid]; CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); } }); - builder_->builder_monitor_.Start("SyncHistogramsAllreduce"); - this->builder_->histred_.Allreduce(builder_->hist_[starting_index].data(), - builder_->hist_builder_.GetNumBins() * sync_count); - builder_->builder_monitor_.Stop("SyncHistogramsAllreduce"); + builder->builder_monitor_.Start("SyncHistogramsAllreduce"); + builder->histred_.Allreduce(builder->hist_[starting_index].data(), + builder->hist_builder_.GetNumBins() * sync_count); + builder->builder_monitor_.Stop("SyncHistogramsAllreduce"); - ParallelSubtractionHist(space, builder_->nodes_for_explicit_hist_build_, p_tree); + ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, p_tree); - common::BlockedSpace2d space2(builder_->nodes_for_subtraction_trick_.size(), [&](size_t node) { + common::BlockedSpace2d space2(builder->nodes_for_subtraction_trick_.size(), [&](size_t node) { return nbins; }, 1024); - ParallelSubtractionHist(space2, builder_->nodes_for_subtraction_trick_, p_tree); - builder_->builder_monitor_.Stop("SyncHistograms"); + ParallelSubtractionHist(builder, space2, builder->nodes_for_subtraction_trick_, p_tree); + builder->builder_monitor_.Stop("SyncHistograms"); } -void DistributedHistSynchronizer::ParallelSubtractionHist(const common::BlockedSpace2d& space, +void DistributedHistSynchronizer::ParallelSubtractionHist(QuantileHistMaker::Builder* builder, + const common::BlockedSpace2d& space, const std::vector& nodes, const RegTree * p_tree) { - common::ParallelFor2d(space, builder_->nthread_, [&](size_t node, common::Range1d r) { + common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { const auto entry = nodes[node]; if (!((*p_tree)[entry.nid].IsLeftChild())) { - auto this_hist = builder_->hist_[entry.nid]; + auto this_hist = builder->hist_[entry.nid]; if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { - auto parent_hist = builder_->hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = builder_->hist_[entry.sibling_nid]; + auto parent_hist = builder->hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = builder->hist_[entry.sibling_nid]; SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); } } }); } -void BatchHistRowsAdder::AddHistRows(int *starting_index, int *sync_count, +void BatchHistRowsAdder::AddHistRows(QuantileHistMaker::Builder* builder, + int *starting_index, int *sync_count, RegTree *p_tree) { - builder_->builder_monitor_.Start("AddHistRows"); + builder->builder_monitor_.Start("AddHistRows"); - for (auto const& entry : builder_->nodes_for_explicit_hist_build_) { + for (auto const& entry : builder->nodes_for_explicit_hist_build_) { int nid = entry.nid; - builder_->hist_.AddHistRow(nid); + builder->hist_.AddHistRow(nid); (*starting_index) = std::min(nid, (*starting_index)); } - (*sync_count) = builder_->nodes_for_explicit_hist_build_.size(); + (*sync_count) = builder->nodes_for_explicit_hist_build_.size(); - for (auto const& node : builder_->nodes_for_subtraction_trick_) { - builder_->hist_.AddHistRow(node.nid); + for (auto const& node : builder->nodes_for_subtraction_trick_) { + builder->hist_.AddHistRow(node.nid); } - builder_->builder_monitor_.Stop("AddHistRows"); + builder->builder_monitor_.Stop("AddHistRows"); } -void DistributedHistRowsAdder::AddHistRows(int *starting_index, int *sync_count, +void DistributedHistRowsAdder::AddHistRows(QuantileHistMaker::Builder* builder, + int *starting_index, int *sync_count, RegTree *p_tree) { - builder_->builder_monitor_.Start("AddHistRows"); - const size_t explicit_size = builder_->nodes_for_explicit_hist_build_.size(); - const size_t subtaction_size = builder_->nodes_for_subtraction_trick_.size(); + builder->builder_monitor_.Start("AddHistRows"); + const size_t explicit_size = builder->nodes_for_explicit_hist_build_.size(); + const size_t subtaction_size = builder->nodes_for_subtraction_trick_.size(); std::vector merged_node_ids(explicit_size + subtaction_size); for (size_t i = 0; i < explicit_size; ++i) { - merged_node_ids[i] = builder_->nodes_for_explicit_hist_build_[i].nid; + merged_node_ids[i] = builder->nodes_for_explicit_hist_build_[i].nid; } for (size_t i = 0; i < subtaction_size; ++i) { merged_node_ids[explicit_size + i] = - builder_->nodes_for_subtraction_trick_[i].nid; + builder->nodes_for_subtraction_trick_[i].nid; } std::sort(merged_node_ids.begin(), merged_node_ids.end()); int n_left = 0; for (auto const& nid : merged_node_ids) { if ((*p_tree)[nid].IsLeftChild()) { - builder_->hist_.AddHistRow(nid); + builder->hist_.AddHistRow(nid); (*starting_index) = std::min(nid, (*starting_index)); n_left++; - builder_->hist_local_worker_.AddHistRow(nid); + builder->hist_local_worker_.AddHistRow(nid); } } for (auto const& nid : merged_node_ids) { if (!((*p_tree)[nid].IsLeftChild())) { - builder_->hist_.AddHistRow(nid); - builder_->hist_local_worker_.AddHistRow(nid); + builder->hist_.AddHistRow(nid); + builder->hist_local_worker_.AddHistRow(nid); } } (*sync_count) = std::max(1, n_left); - builder_->builder_monitor_.Stop("AddHistRows"); + builder->builder_monitor_.Stop("AddHistRows"); } void QuantileHistMaker::Builder::SetHistSynchronizer(HistSynchronizer* sync) { @@ -264,9 +269,9 @@ void QuantileHistMaker::Builder::BuildHistogramsLossGuide( int starting_index = std::numeric_limits::max(); int sync_count = 0; - hist_rows_adder_->AddHistRows(&starting_index, &sync_count, p_tree); + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); - hist_synchronizer_->SyncHistograms(starting_index, sync_count, p_tree); + hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); } void QuantileHistMaker::Builder::BuildLocalHistograms( @@ -443,9 +448,9 @@ void QuantileHistMaker::Builder::ExpandWithDepthWise( std::vector temp_qexpand_depth; SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_, &nodes_for_subtraction_trick_, p_tree); - hist_rows_adder_->AddHistRows(&starting_index, &sync_count, p_tree); + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); - hist_synchronizer_->SyncHistograms(starting_index, sync_count, p_tree); + hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); BuildNodeStats(gmat, p_fmat, p_tree, gpair_h); EvaluateAndApplySplits(gmat, column_matrix, p_tree, &num_leaves, depth, ×tamp, diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index a67abbcae949..9402569a60c2 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -396,64 +396,47 @@ class QuantileHistMaker: public TreeUpdater { class HistSynchronizer { public: - explicit HistSynchronizer(QuantileHistMaker::Builder* builder) : builder_(builder) {} - virtual void SyncHistograms(int starting_index, + virtual void SyncHistograms(QuantileHistMaker::Builder* builder, + int starting_index, int sync_count, RegTree *p_tree) = 0; - virtual ~HistSynchronizer() { - builder_ = nullptr; - } - - protected: - QuantileHistMaker::Builder* builder_; }; class BatchHistSynchronizer: public HistSynchronizer { public: - explicit BatchHistSynchronizer(QuantileHistMaker::Builder* builder): HistSynchronizer(builder) {} - - void SyncHistograms(int starting_index, - int sync_count, - RegTree *p_tree) override; + void SyncHistograms(QuantileHistMaker::Builder* builder, + int starting_index, + int sync_count, + RegTree *p_tree) override; }; class DistributedHistSynchronizer: public HistSynchronizer { public: - explicit DistributedHistSynchronizer(QuantileHistMaker::Builder* builder): - HistSynchronizer(builder) {} + void SyncHistograms(QuantileHistMaker::Builder* builder_, + int starting_index, int sync_count, RegTree *p_tree) override; - void SyncHistograms(int starting_index, - int sync_count, - RegTree *p_tree) override; - void ParallelSubtractionHist(const common::BlockedSpace2d& space, + void ParallelSubtractionHist(QuantileHistMaker::Builder* builder, + const common::BlockedSpace2d& space, const std::vector& nodes, const RegTree * p_tree); }; class HistRowsAdder { public: - explicit HistRowsAdder(QuantileHistMaker::Builder* builder) : builder_(builder) {} - virtual void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) = 0; - virtual ~HistRowsAdder() { - builder_ = nullptr; - } - - protected: - QuantileHistMaker::Builder* builder_; + virtual void AddHistRows(QuantileHistMaker::Builder* builder, + int *starting_index, int *sync_count, RegTree *p_tree) = 0; }; class BatchHistRowsAdder: public HistRowsAdder { public: - explicit BatchHistRowsAdder(QuantileHistMaker::Builder* builder) : HistRowsAdder(builder) {} - - void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) override; + void AddHistRows(QuantileHistMaker::Builder* builder, + int *starting_index, int *sync_count, RegTree *p_tree) override; }; class DistributedHistRowsAdder: public HistRowsAdder { public: - explicit DistributedHistRowsAdder(QuantileHistMaker::Builder* builder) : HistRowsAdder(builder) {} - - void AddHistRows(int *starting_index, int *sync_count, RegTree *p_tree) override; + void AddHistRows(QuantileHistMaker::Builder* builder, + int *starting_index, int *sync_count, RegTree *p_tree) override; }; diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 77ed64fc5077..3e6131b49ac1 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -140,7 +140,7 @@ class QuantileHistMock : public QuantileHistMaker { nodes_for_subtraction_trick_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); - hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); ASSERT_EQ(sync_count, 2); ASSERT_EQ(starting_index, 3); @@ -166,7 +166,7 @@ class QuantileHistMock : public QuantileHistMaker { nodes_for_subtraction_trick_.clear(); // level 0 nodes_for_explicit_hist_build_.emplace_back(0, -1, tree->GetDepth(0), 0.0f, 0); - hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); nodes_for_explicit_hist_build_.clear(); @@ -176,7 +176,7 @@ class QuantileHistMock : public QuantileHistMaker { tree->GetDepth(1), 0.0f, 0); nodes_for_subtraction_trick_.emplace_back((*tree)[0].RightChild(), (*tree)[0].LeftChild(), tree->GetDepth(2), 0.0f, 0); - hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); @@ -187,7 +187,7 @@ class QuantileHistMock : public QuantileHistMaker { nodes_for_subtraction_trick_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); nodes_for_explicit_hist_build_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); - hist_rows_adder_->AddHistRows(&starting_index, &sync_count, tree); + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); const size_t n_nodes = nodes_for_explicit_hist_build_.size(); ASSERT_EQ(n_nodes, 2); @@ -233,7 +233,7 @@ class QuantileHistMock : public QuantileHistMaker { hist_buffer_.Reset(1, n_nodes, space, target_hists); // sync hist - hist_synchronizer_->SyncHistograms(starting_index, sync_count, tree); + hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, tree); auto check_hist = [] (const GHistRow parent, const GHistRow left, const GHistRow right, size_t begin, size_t end) { @@ -479,11 +479,11 @@ class QuantileHistMock : public QuantileHistMaker { int_constraint_, dmat_.get())); if (batch) { - builder_->SetHistSynchronizer(new BatchHistSynchronizer(builder_.get())); - builder_->SetHistRowsAdder(new BatchHistRowsAdder(builder_.get())); + builder_->SetHistSynchronizer(new BatchHistSynchronizer()); + builder_->SetHistRowsAdder(new BatchHistRowsAdder()); } else { - builder_->SetHistSynchronizer(new DistributedHistSynchronizer(builder_.get())); - builder_->SetHistRowsAdder(new DistributedHistRowsAdder(builder_.get())); + builder_->SetHistSynchronizer(new DistributedHistSynchronizer()); + builder_->SetHistRowsAdder(new DistributedHistRowsAdder()); } } ~QuantileHistMock() override = default;