Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@

//=== Regressions

== {es} version 7.10.0

=== Bug Fixes

* Fix progress on resume after final training has completed for classification and regression.
We previously showed progress stuck at zero for final training. (See {ml-pull}1443[#1443].)

== {es} version 7.9.0

=== New Features
Expand Down
3 changes: 3 additions & 0 deletions include/maths/CBoostedTreeImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@ class MATHS_EXPORT CBoostedTreeImpl final {
//! Start monitoring the final model training.
void startProgressMonitoringFinalTrain();

//! Skip monitoring the final model training.
void skipProgressMonitoringFinalTrain();

//! Record the training state using the \p recordTrainState callback function
void recordState(const TTrainingStateCallback& recordTrainState) const;

Expand Down
13 changes: 10 additions & 3 deletions lib/maths/CBoostedTreeImpl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ void CBoostedTreeImpl::train(core::CDataFrame& frame,

core::CProgramCounters::counter(counter_t::E_DFTPMTrainedForestNumberTrees) =
m_BestForest.size();
} else {
this->skipProgressMonitoringFinalTrain();
}

this->computeClassificationWeights(frame);
Expand Down Expand Up @@ -346,9 +348,9 @@ std::size_t CBoostedTreeImpl::estimateMemoryUsage(std::size_t numberRows,
std::size_t dataTypeMemoryUsage{maximumNumberFeatures * sizeof(CDataFrameUtils::SDataType)};
std::size_t featureSampleProbabilities{maximumNumberFeatures * sizeof(double)};
// Assuming either many or few missing rows, we get good compression of the bit
// vector. Specifically, we'll assume the average run length is 256 for which
// we get a constant 4 * 8 / 256.
std::size_t missingFeatureMaskMemoryUsage{32 * numberColumns * numberRows / 256};
// vector. Specifically, we'll assume the average run length is 64 for which
// we get a constant 8 / 64.
std::size_t missingFeatureMaskMemoryUsage{8 * numberColumns * numberRows / 64};
std::size_t trainTestMaskMemoryUsage{
2 * static_cast<std::size_t>(std::ceil(std::log2(static_cast<double>(m_NumberFolds)))) *
numberRows};
Expand Down Expand Up @@ -1347,13 +1349,18 @@ void CBoostedTreeImpl::startProgressMonitoringFineTuneHyperparameters() {
}

void CBoostedTreeImpl::startProgressMonitoringFinalTrain() {

// The final model training uses more data so it's monitored separately.

m_Instrumentation->startNewProgressMonitoredTask(CBoostedTreeFactory::FINAL_TRAINING);
m_TrainingProgress = core::CLoopProgress{
m_MaximumNumberTrees, m_Instrumentation->progressCallback(), 1.0, 1024};
}

void CBoostedTreeImpl::skipProgressMonitoringFinalTrain() {
m_Instrumentation->startNewProgressMonitoredTask(CBoostedTreeFactory::FINAL_TRAINING);
}

namespace {
const std::string VERSION_7_8_TAG{"7.8"};
const TStrVec SUPPORTED_VERSIONS{VERSION_7_8_TAG};
Expand Down