Skip to content

Commit 54108e4

Browse files
authored
[7.x][ML] Fixing memory_status output on fatal error (#1438)
When the job is failing due to the memory limit exceeding, the job fails to emit the final memory_status message. This PR fixes this. There was a type in the "hard_limit" tag which I fixed. Also I tweaked the parameters of the unit test CDataFrameAnalyzerTrainingTest.testMemoryLimitHandling to reduce runtime. Additionally I took the new messaging from #1428 and added it here to avoid merge conflicts.
1 parent 1107a8c commit 54108e4

File tree

4 files changed

+20
-12
lines changed

4 files changed

+20
-12
lines changed

lib/api/CDataFrameAnalysisInstrumentation.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ const std::string HYPERPARAMETERS_TAG{"hyperparameters"};
4040
const std::string MEMORY_REESTIMATE_TAG{"memory_reestimate_bytes"};
4141
const std::string ITERATION_TAG{"iteration"};
4242
const std::string JOB_ID_TAG{"job_id"};
43-
const std::string MEMORY_STATUS_HARD_LIMIT_TAG{"hard-limit"};
43+
const std::string MEMORY_STATUS_HARD_LIMIT_TAG{"hard_limit"};
4444
const std::string MEMORY_STATUS_OK_TAG{"ok"};
4545
const std::string MEMORY_STATUS_TAG{"status"};
4646
const std::string MEMORY_TYPE_TAG{"analytics_memory_usage"};
@@ -190,10 +190,12 @@ void CDataFrameAnalysisInstrumentation::monitor(CDataFrameAnalysisInstrumentatio
190190
instrumentation.memoryReestimate(static_cast<std::int64_t>(memoryReestimateBytes));
191191
instrumentation.memoryStatus(E_HardLimit);
192192
instrumentation.flush();
193-
HANDLE_FATAL(<< "Input error: required memory " << bytesToString(memory)
194-
<< " exceeds the memory limit " << bytesToString(memoryLimit)
195-
<< ". Please force-stop the analysis job, increase the limit to at least "
196-
<< bytesToString(memoryReestimateBytes) << " and restart.")
193+
writer.flush();
194+
LOG_INFO(<< "Required memory " << memory << " exceeds the memory limit " << memoryLimit
195+
<< ". New estimated limit is " << memoryReestimateBytes << ".");
196+
HANDLE_FATAL(<< "Input error: memory limit [" << bytesToString(memoryLimit)
197+
<< "] has been exceeded. Please force stop the job, increase to new estimated limit ["
198+
<< bytesToString(memoryReestimateBytes) << "] and restart.")
197199
}
198200

199201
wait = std::min(2 * wait, 1024);

lib/api/unittest/CDataFrameAnalyzerOutlierTest.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ BOOST_AUTO_TEST_CASE(testErrors) {
530530
BOOST_TEST_REQUIRE(errors.size() > 0);
531531
bool memoryLimitExceed{false};
532532
for (const auto& error : errors) {
533-
if (error.find("Input error: required memory") != std::string::npos) {
533+
if (error.find("Input error: memory limit") != std::string::npos) {
534534
memoryLimitExceed = true;
535535
break;
536536
}
@@ -549,7 +549,7 @@ BOOST_AUTO_TEST_CASE(testErrors) {
549549
std::string status{result["analytics_memory_usage"]["status"].GetString()};
550550
if (status == "ok") {
551551
memoryStatusOk = true;
552-
} else if (status == "hard-limit") {
552+
} else if (status == "hard_limit") {
553553
memoryStatusHardLimit = true;
554554
if (result["analytics_memory_usage"].HasMember("memory_reestimate_bytes") &&
555555
result["analytics_memory_usage"]["memory_reestimate_bytes"]

lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -378,23 +378,28 @@ BOOST_AUTO_TEST_CASE(testMemoryLimitHandling) {
378378
};
379379

380380
TDoubleVec expectedPredictions;
381+
std::size_t numberSamples{50};
381382

382383
TStrVec fieldNames{"f1", "f2", "f3", "f4", "target", ".", "."};
383384
TStrVec fieldValues{"", "", "", "", "", "0", ""};
384385
api::CDataFrameAnalyzer analyzer{
385-
test::CDataFrameAnalysisSpecificationFactory{}.memoryLimit(1000).predictionSpec(
386-
test::CDataFrameAnalysisSpecificationFactory::regression(), "target"),
386+
test::CDataFrameAnalysisSpecificationFactory{}
387+
.rows(numberSamples)
388+
.predictionMaximumNumberTrees(2)
389+
.memoryLimit(10)
390+
.predicitionNumberRoundsPerHyperparameter(1)
391+
.predictionSpec(test::CDataFrameAnalysisSpecificationFactory::regression(), "target"),
387392
outputWriterFactory};
388393
test::CDataFrameAnalyzerTrainingFactory::addPredictionTestData(
389394
TLossFunctionType::E_MseRegression, fieldNames, fieldValues, analyzer,
390-
expectedPredictions);
395+
expectedPredictions, numberSamples);
391396

392397
analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "$"});
393398

394399
BOOST_TEST_REQUIRE(errors.size() > 0);
395400
bool memoryLimitExceed{false};
396401
for (const auto& error : errors) {
397-
if (error.find("Input error: required memory") != std::string::npos) {
402+
if (error.find("Input error: memory limit") != std::string::npos) {
398403
memoryLimitExceed = true;
399404
break;
400405
}
@@ -413,7 +418,7 @@ BOOST_AUTO_TEST_CASE(testMemoryLimitHandling) {
413418
std::string status{result["analytics_memory_usage"]["status"].GetString()};
414419
if (status == "ok") {
415420
memoryStatusOk = true;
416-
} else if (status == "hard-limit") {
421+
} else if (status == "hard_limit") {
417422
memoryStatusHardLimit = true;
418423
if (result["analytics_memory_usage"].HasMember("memory_reestimate_bytes") &&
419424
result["analytics_memory_usage"]["memory_reestimate_bytes"].GetInt() > 0) {

lib/core/CRapidJsonConcurrentLineWriter.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ CRapidJsonConcurrentLineWriter::CRapidJsonConcurrentLineWriter(CJsonOutputStream
1515
}
1616

1717
CRapidJsonConcurrentLineWriter::~CRapidJsonConcurrentLineWriter() {
18+
this->flush();
1819
m_OutputStreamWrapper.releaseBuffer(*this, m_StringBuffer);
1920
}
2021

0 commit comments

Comments
 (0)