From 462023dd0f6f6a04e75e8fde213bd774390a9578 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Thu, 1 Aug 2019 09:26:52 +0200 Subject: [PATCH 1/6] Introduce a mode in which data_frame_analyzer binary only outputs memory estimations but does not perform any analysis --- bin/data_frame_analyzer/CCmdLineParser.cc | 5 ++ bin/data_frame_analyzer/CCmdLineParser.h | 1 + bin/data_frame_analyzer/Main.cc | 29 ++++++--- include/api/CDataFrameAnalysisRunner.h | 6 ++ include/api/CDataFrameAnalysisSpecification.h | 5 ++ .../CMemoryUsageEstimationResultJsonWriter.h | 50 ++++++++++++++ lib/api/CDataFrameAnalysisRunner.cc | 18 +++++ lib/api/CDataFrameAnalysisSpecification.cc | 6 ++ .../CMemoryUsageEstimationResultJsonWriter.cc | 41 ++++++++++++ lib/api/Makefile.first | 1 + .../unittest/CDataFrameAnalysisRunnerTest.cc | 65 +++++++++++++++++++ .../unittest/CDataFrameAnalysisRunnerTest.h | 1 + ...moryUsageEstimationResultJsonWriterTest.cc | 53 +++++++++++++++ ...emoryUsageEstimationResultJsonWriterTest.h | 18 +++++ lib/api/unittest/Main.cc | 6 +- lib/api/unittest/Makefile | 1 + 16 files changed, 296 insertions(+), 10 deletions(-) create mode 100644 include/api/CMemoryUsageEstimationResultJsonWriter.h create mode 100644 lib/api/CMemoryUsageEstimationResultJsonWriter.cc create mode 100644 lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc create mode 100644 lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.h diff --git a/bin/data_frame_analyzer/CCmdLineParser.cc b/bin/data_frame_analyzer/CCmdLineParser.cc index 8c5468e3e5..c56c36b189 100644 --- a/bin/data_frame_analyzer/CCmdLineParser.cc +++ b/bin/data_frame_analyzer/CCmdLineParser.cc @@ -20,6 +20,7 @@ const std::string CCmdLineParser::DESCRIPTION = "Usage: data_frame_analyzer [opt bool CCmdLineParser::parse(int argc, const char* const* argv, std::string& configFile, + bool& memoryUsageEstimationOnly, std::string& logProperties, std::string& logPipe, bool& lengthEncodedInput, @@ -35,6 +36,7 @@ bool CCmdLineParser::parse(int argc, ("version", "Display version information and exit") ("config", boost::program_options::value(), "The configuration file") + ("memoryUsageEstimationOnly", "Whether to perform memory usage estimation only") ("logProperties", boost::program_options::value(), "Optional logger properties file") ("logPipe", boost::program_options::value(), @@ -66,6 +68,9 @@ bool CCmdLineParser::parse(int argc, if (vm.count("config") > 0) { configFile = vm["config"].as(); } + if (vm.count("memoryUsageEstimationOnly") > 0) { + memoryUsageEstimationOnly = true; + } if (vm.count("logProperties") > 0) { logProperties = vm["logProperties"].as(); } diff --git a/bin/data_frame_analyzer/CCmdLineParser.h b/bin/data_frame_analyzer/CCmdLineParser.h index b097fe166c..7623d0d07b 100644 --- a/bin/data_frame_analyzer/CCmdLineParser.h +++ b/bin/data_frame_analyzer/CCmdLineParser.h @@ -27,6 +27,7 @@ class CCmdLineParser { static bool parse(int argc, const char* const* argv, std::string& configFile, + bool& memoryUsageEstimationOnly, std::string& logProperties, std::string& logPipe, bool& lengthEncodedInput, diff --git a/bin/data_frame_analyzer/Main.cc b/bin/data_frame_analyzer/Main.cc index 90644b1ce2..5fd8c8c7b7 100644 --- a/bin/data_frame_analyzer/Main.cc +++ b/bin/data_frame_analyzer/Main.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include "CCmdLineParser.h" @@ -86,6 +87,7 @@ int main(int argc, char** argv) { // Read command line options std::string configFile; + bool memoryUsageEstimationOnly(false); std::string logProperties; std::string logPipe; bool lengthEncodedInput(false); @@ -94,7 +96,7 @@ int main(int argc, char** argv) { std::string outputFileName; bool isOutputFileNamedPipe(false); if (ml::data_frame_analyzer::CCmdLineParser::parse( - argc, argv, configFile, logProperties, logPipe, lengthEncodedInput, inputFileName, + argc, argv, configFile, memoryUsageEstimationOnly, logProperties, logPipe, lengthEncodedInput, inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe) == false) { return EXIT_FAILURE; } @@ -127,13 +129,6 @@ int main(int argc, char** argv) { } using TInputParserUPtr = std::unique_ptr; - auto inputParser{[lengthEncodedInput, &ioMgr]() -> TInputParserUPtr { - if (lengthEncodedInput) { - return std::make_unique(ioMgr.inputStream()); - } - return std::make_unique( - ioMgr.inputStream(), ml::api::CCsvInputParser::COMMA); - }()}; std::string analysisSpecificationJson; bool couldReadConfigFile; @@ -145,6 +140,17 @@ int main(int argc, char** argv) { auto analysisSpecification = std::make_unique(analysisSpecificationJson); + + if (memoryUsageEstimationOnly) { + const auto result = analysisSpecification->estimateMemoryUsage(); + auto outStream = [&ioMgr]() { + return std::make_unique(ioMgr.outputStream()); + }(); + ml::api::CMemoryUsageEstimationResultJsonWriter jsonWriter(*outStream); + jsonWriter.write(result); + return EXIT_SUCCESS; + } + if (analysisSpecification->numberThreads() > 1) { ml::core::startDefaultAsyncExecutor(analysisSpecification->numberThreads()); } @@ -156,6 +162,13 @@ int main(int argc, char** argv) { CCleanUpOnExit::add(dataFrameAnalyzer.dataFrameDirectory()); + auto inputParser{[lengthEncodedInput, &ioMgr]() -> TInputParserUPtr { + if (lengthEncodedInput) { + return std::make_unique(ioMgr.inputStream()); + } + return std::make_unique( + ioMgr.inputStream(), ml::api::CCsvInputParser::COMMA); + }()}; if (inputParser->readStreamIntoVecs( [&dataFrameAnalyzer](const auto& fieldNames, const auto& fieldValues) { return dataFrameAnalyzer.handleRecord(fieldNames, fieldValues); diff --git a/include/api/CDataFrameAnalysisRunner.h b/include/api/CDataFrameAnalysisRunner.h index c4ef7e73ab..c836baff0a 100644 --- a/include/api/CDataFrameAnalysisRunner.h +++ b/include/api/CDataFrameAnalysisRunner.h @@ -31,6 +31,7 @@ class CRowRef; } namespace api { class CDataFrameAnalysisSpecification; +struct SMemoryUsageEstimationResult; //! \brief Hierarchy for running a specific core::CDataFrame analyses. //! @@ -75,6 +76,11 @@ class API_EXPORT CDataFrameAnalysisRunner { //! number of rows per subset. void computeAndSaveExecutionStrategy(); + //! Estimates memory usage in two cases: one partition (the whole data frame + //! fits in main memory) and maximum tolerable number of partitions (only + //! one partition needs to be loaded to main memory. + SMemoryUsageEstimationResult estimateMemoryUsage() const; + //! Check if the data frame for this analysis should use in or out of core //! storage. bool storeDataFrameInMainMemory() const; diff --git a/include/api/CDataFrameAnalysisSpecification.h b/include/api/CDataFrameAnalysisSpecification.h index 82bdb86332..d113c5de2b 100644 --- a/include/api/CDataFrameAnalysisSpecification.h +++ b/include/api/CDataFrameAnalysisSpecification.h @@ -151,6 +151,11 @@ class API_EXPORT CDataFrameAnalysisSpecification { //! calling thread until the runner has finished. CDataFrameAnalysisRunner* run(core::CDataFrame& frame) const; + //! Estimates memory usage in two cases: one partition (the whole data frame + //! fits in main memory) and maximum tolerable number of partitions (only + //! one partition needs to be loaded to main memory. + SMemoryUsageEstimationResult estimateMemoryUsage() const; + private: void initializeRunner(const rapidjson::Value& jsonAnalysis); diff --git a/include/api/CMemoryUsageEstimationResultJsonWriter.h b/include/api/CMemoryUsageEstimationResultJsonWriter.h new file mode 100644 index 0000000000..a7bc7356d6 --- /dev/null +++ b/include/api/CMemoryUsageEstimationResultJsonWriter.h @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +#ifndef INCLUDED_ml_api_CMemoryUsageEstimationResultJsonWriter_h +#define INCLUDED_ml_api_CMemoryUsageEstimationResultJsonWriter_h + +#include +#include +#include + +#include + +#include + +namespace ml { +namespace api { + +//! Structure to store the memory usage estimation result +struct API_EXPORT SMemoryUsageEstimationResult { + SMemoryUsageEstimationResult(size_t memoryUsageWithOnePartition, size_t memoryUsageWithMaxPartitions); + SMemoryUsageEstimationResult(const SMemoryUsageEstimationResult& result); + + const size_t s_MemoryUsageWithOnePartition; + const size_t s_MemoryUsageWithMaxPartitions; +}; + +//! \brief +//! Write memory usage estimation result in JSON format +//! +//! DESCRIPTION:\n +//! Outputs the memory usage estimation result. +//! +class API_EXPORT CMemoryUsageEstimationResultJsonWriter : private core::CNonCopyable { +public: + //! Constructor that causes output to be written to the specified wrapped stream + CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut); + + //! Writes the given memory usage estimation result in JSON format. + void write(const SMemoryUsageEstimationResult& result); + +private: + //! JSON line writer + core::CRapidJsonConcurrentLineWriter m_Writer; +}; +} +} + +#endif // INCLUDED_ml_api_CMemoryUsageEstimationResultJsonWriter_h diff --git a/lib/api/CDataFrameAnalysisRunner.cc b/lib/api/CDataFrameAnalysisRunner.cc index 5bb3420259..89f74152d5 100644 --- a/lib/api/CDataFrameAnalysisRunner.cc +++ b/lib/api/CDataFrameAnalysisRunner.cc @@ -11,6 +11,7 @@ #include #include +#include #include @@ -36,6 +37,23 @@ CDataFrameAnalysisRunner::~CDataFrameAnalysisRunner() { this->waitToFinish(); } +SMemoryUsageEstimationResult CDataFrameAnalysisRunner::estimateMemoryUsage() const { + std::size_t numberRows{m_Spec.numberRows()}; + std::size_t numberColumns{m_Spec.numberColumns() + this->numberExtraColumns()}; + std::size_t maximumNumberPartitions{ + static_cast(std::sqrt(static_cast(numberRows)) + 0.5)}; + if (maximumNumberPartitions == 0) { + return SMemoryUsageEstimationResult(0, 0); + } + std::size_t memoryUsageWithOnePartition{ + this->estimateMemoryUsage(numberRows, numberRows, numberColumns)}; + std::size_t memoryUsageWithMaxPartitions{ + this->estimateMemoryUsage(numberRows, numberRows / maximumNumberPartitions, numberColumns)}; + return SMemoryUsageEstimationResult( + std::ceil(roundMb(memoryUsageWithOnePartition)), + std::ceil(roundMb(memoryUsageWithMaxPartitions))); +} + void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() { std::size_t numberRows{m_Spec.numberRows()}; diff --git a/lib/api/CDataFrameAnalysisSpecification.cc b/lib/api/CDataFrameAnalysisSpecification.cc index 313430e479..528dedd2b8 100644 --- a/lib/api/CDataFrameAnalysisSpecification.cc +++ b/lib/api/CDataFrameAnalysisSpecification.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -183,6 +184,11 @@ CDataFrameAnalysisRunner* CDataFrameAnalysisSpecification::run(core::CDataFrame& return nullptr; } +SMemoryUsageEstimationResult CDataFrameAnalysisSpecification::estimateMemoryUsage() const { + // TODO: What to do if m_Runner is nullptr? + return m_Runner->estimateMemoryUsage(); +} + void CDataFrameAnalysisSpecification::initializeRunner(const rapidjson::Value& jsonAnalysis) { // We pass of the interpretation of the parameters object to the appropriate // analysis runner. diff --git a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc new file mode 100644 index 0000000000..209a7ed0cb --- /dev/null +++ b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +#include + +namespace ml { +namespace api { +namespace { + +// JSON field names +const std::string MEMORY_USAGE_WITH_ONE_PARTITION("memory_usage_with_one_partition"); +const std::string MEMORY_USAGE_WITH_MAX_PARTITIONS("memory_usage_with_max_partitions"); +} + +SMemoryUsageEstimationResult::SMemoryUsageEstimationResult( + std::size_t memoryUsageWithOnePartition, std::size_t memoryUsageWithMaxPartitions): +s_MemoryUsageWithOnePartition(memoryUsageWithOnePartition), s_MemoryUsageWithMaxPartitions(memoryUsageWithMaxPartitions) {} + +SMemoryUsageEstimationResult::SMemoryUsageEstimationResult( + const SMemoryUsageEstimationResult& result): +SMemoryUsageEstimationResult(result.s_MemoryUsageWithOnePartition, result.s_MemoryUsageWithMaxPartitions) {} + +CMemoryUsageEstimationResultJsonWriter::CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut) : m_Writer(strmOut) { + // Don't write any output in the constructor because, the way things work at + // the moment, the output stream might be redirected after construction +} + +void CMemoryUsageEstimationResultJsonWriter::write(const SMemoryUsageEstimationResult& result) { + m_Writer.StartObject(); + m_Writer.Key(MEMORY_USAGE_WITH_ONE_PARTITION); + m_Writer.Uint64(result.s_MemoryUsageWithOnePartition); + m_Writer.Key(MEMORY_USAGE_WITH_MAX_PARTITIONS); + m_Writer.Uint64(result.s_MemoryUsageWithMaxPartitions); + m_Writer.EndObject(); + m_Writer.flush(); +} +} +} diff --git a/lib/api/Makefile.first b/lib/api/Makefile.first index 7745c14070..1a03be3996 100644 --- a/lib/api/Makefile.first +++ b/lib/api/Makefile.first @@ -43,6 +43,7 @@ CInputParser.cc \ CIoManager.cc \ CJsonOutputWriter.cc \ CLengthEncodedInputParser.cc \ +CMemoryUsageEstimationResultJsonWriter.cc \ CModelPlotDataJsonWriter.cc \ CModelSizeStatsJsonWriter.cc \ CModelSnapshotJsonWriter.cc \ diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc index 627bd0a049..1aa8674e04 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -121,6 +122,67 @@ void CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageF } } +void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage() { + + std::vector errors; + std::mutex errorsMutex; + auto errorHandler = [&errors, &errorsMutex](std::string error) { + std::lock_guard lock{errorsMutex}; + errors.push_back(error); + }; + + core::CLogger::CScopeSetFatalErrorHandler scope{errorHandler}; + api::CDataFrameOutliersRunnerFactory factory; + + // Test estimation for empty data frame + { + errors.clear(); + std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( + 0, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; + api::CDataFrameAnalysisSpecification spec{jsonSpec}; + + // Check memory estimation result + api::SMemoryUsageEstimationResult result = spec.estimateMemoryUsage(); + CPPUNIT_ASSERT_EQUAL(0, static_cast(result.s_MemoryUsageWithOnePartition)); + CPPUNIT_ASSERT_EQUAL(0, static_cast(result.s_MemoryUsageWithMaxPartitions)); + + // no error should be registered + CPPUNIT_ASSERT_EQUAL(1, static_cast(errors.size())); + } + + // Test estimation for data frame with 1 row + { + errors.clear(); + std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( + 1, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; + api::CDataFrameAnalysisSpecification spec{jsonSpec}; + + // Check memory estimation result + api::SMemoryUsageEstimationResult result = spec.estimateMemoryUsage(); + CPPUNIT_ASSERT_EQUAL(6050, static_cast(result.s_MemoryUsageWithOnePartition)); + CPPUNIT_ASSERT_EQUAL(6050, static_cast(result.s_MemoryUsageWithMaxPartitions)); + + // no error should be registered + CPPUNIT_ASSERT_EQUAL(0, static_cast(errors.size())); + } + + // Test estimation for data frame with 4 rows + { + errors.clear(); + std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( + 4, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; + api::CDataFrameAnalysisSpecification spec{jsonSpec}; + + // Check memory estimation result + api::SMemoryUsageEstimationResult result = spec.estimateMemoryUsage(); + CPPUNIT_ASSERT_EQUAL(9104, static_cast(result.s_MemoryUsageWithOnePartition)); + CPPUNIT_ASSERT_EQUAL(8528, static_cast(result.s_MemoryUsageWithMaxPartitions)); + + // no error should be registered + CPPUNIT_ASSERT_EQUAL(0, static_cast(errors.size())); + } +} + CppUnit::Test* CDataFrameAnalysisRunnerTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CDataFrameAnalysisRunnerTest"); @@ -130,6 +192,9 @@ CppUnit::Test* CDataFrameAnalysisRunnerTest::suite() { suiteOfTests->addTest(new CppUnit::TestCaller( "CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageFlag", &CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageFlag)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage", + &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage)); return suiteOfTests; } diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.h b/lib/api/unittest/CDataFrameAnalysisRunnerTest.h index c2f960c105..20aa2c5f09 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.h +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.h @@ -13,6 +13,7 @@ class CDataFrameAnalysisRunnerTest : public CppUnit::TestFixture { public: void testComputeExecutionStrategyForOutliers(); void testComputeAndSaveExecutionStrategyDiskUsageFlag(); + void testEstimateMemoryUsage(); static CppUnit::Test* suite(); diff --git a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc new file mode 100644 index 0000000000..4194f67e4d --- /dev/null +++ b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +#include "CMemoryUsageEstimationResultJsonWriterTest.h" + +#include +#include + +#include + +#include + +#include + +#include + +using namespace ml; +using namespace api; + +CppUnit::Test* CMemoryUsageEstimationResultJsonWriterTest::suite() { + CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageEstimationResultJsonWriterTest"); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CMemoryUsageEstimationResultJsonWriterTest::testWrite", &CMemoryUsageEstimationResultJsonWriterTest::testWrite)); + return suiteOfTests; +} + +void CMemoryUsageEstimationResultJsonWriterTest::testWrite() { + std::ostringstream sstream; + + // The output writer won't close the JSON structures until is is destroyed + { + core::CJsonOutputStreamWrapper wrappedOutStream(sstream); + CMemoryUsageEstimationResultJsonWriter writer(wrappedOutStream); + SMemoryUsageEstimationResult result(2000, 1000); + writer.write(result); + } + + rapidjson::Document arrayDoc; + arrayDoc.Parse(sstream.str().c_str()); + + CPPUNIT_ASSERT(arrayDoc.IsArray()); + CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), arrayDoc.Size()); + + const rapidjson::Value& object = arrayDoc[rapidjson::SizeType(0)]; + CPPUNIT_ASSERT(object.IsObject()); + + CPPUNIT_ASSERT(object.HasMember("memory_usage_with_one_partition")); + CPPUNIT_ASSERT_EQUAL(int64_t(2000), object["memory_usage_with_one_partition"].GetInt64()); + CPPUNIT_ASSERT(object.HasMember("memory_usage_with_max_partitions")); + CPPUNIT_ASSERT_EQUAL(int64_t(1000), object["memory_usage_with_max_partitions"].GetInt64()); +} diff --git a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.h b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.h new file mode 100644 index 0000000000..def23a83c3 --- /dev/null +++ b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.h @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +#ifndef INCLUDED_CMemoryUsageEstimationResultJsonWriterTest_h +#define INCLUDED_CMemoryUsageEstimationResultJsonWriterTest_h + +#include + +class CMemoryUsageEstimationResultJsonWriterTest : public CppUnit::TestFixture { +public: + void testWrite(); + + static CppUnit::Test* suite(); +}; + +#endif // INCLUDED_CMemoryUsageEstimationResultJsonWriterTest_h diff --git a/lib/api/unittest/Main.cc b/lib/api/unittest/Main.cc index 15b52853b1..46ba1a1836 100644 --- a/lib/api/unittest/Main.cc +++ b/lib/api/unittest/Main.cc @@ -22,6 +22,7 @@ #include "CIoManagerTest.h" #include "CJsonOutputWriterTest.h" #include "CLengthEncodedInputParserTest.h" +#include "CMemoryUsageEstimationResultJsonWriterTest.h" #include "CModelPlotDataJsonWriterTest.h" #include "CModelSnapshotJsonWriterTest.h" #include "CMultiFileDataAdderTest.h" @@ -57,11 +58,12 @@ int main(int argc, const char** argv) { runner.addTest(CIoManagerTest::suite()); runner.addTest(CJsonOutputWriterTest::suite()); runner.addTest(CLengthEncodedInputParserTest::suite()); - runner.addTest(CNdJsonInputParserTest::suite()); - runner.addTest(CNdJsonOutputWriterTest::suite()); + runner.addTest(CMemoryUsageEstimationResultJsonWriterTest::suite()); runner.addTest(CModelPlotDataJsonWriterTest::suite()); runner.addTest(CModelSnapshotJsonWriterTest::suite()); runner.addTest(CMultiFileDataAdderTest::suite()); + runner.addTest(CNdJsonInputParserTest::suite()); + runner.addTest(CNdJsonOutputWriterTest::suite()); runner.addTest(COutputChainerTest::suite()); runner.addTest(CPersistenceManagerTest::suite()); runner.addTest(CRestorePreviousStateTest::suite()); diff --git a/lib/api/unittest/Makefile b/lib/api/unittest/Makefile index 761b67c02c..24290e5658 100644 --- a/lib/api/unittest/Makefile +++ b/lib/api/unittest/Makefile @@ -37,6 +37,7 @@ SRCS=\ CIoManagerTest.cc \ CJsonOutputWriterTest.cc \ CLengthEncodedInputParserTest.cc \ + CMemoryUsageEstimationResultJsonWriterTest.cc \ CMockDataAdder.cc \ CMockDataProcessor.cc \ CMockSearcher.cc \ From c57f57b6f4103c411a67ecf1ddc329595d660f37 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Wed, 7 Aug 2019 09:59:55 +0200 Subject: [PATCH 2/6] Apply review comments --- bin/data_frame_analyzer/Main.cc | 5 +- include/api/CDataFrameAnalysisRunner.h | 4 +- include/api/CDataFrameAnalysisSpecification.h | 2 +- .../CMemoryUsageEstimationResultJsonWriter.h | 12 +-- lib/api/CDataFrameAnalysisRunner.cc | 37 ++++---- lib/api/CDataFrameAnalysisSpecification.cc | 4 +- .../CMemoryUsageEstimationResultJsonWriter.cc | 26 ++--- .../unittest/CDataFrameAnalysisRunnerTest.cc | 95 +++++++++++-------- .../unittest/CDataFrameAnalysisRunnerTest.h | 6 +- ...moryUsageEstimationResultJsonWriterTest.cc | 14 +-- 10 files changed, 106 insertions(+), 99 deletions(-) diff --git a/bin/data_frame_analyzer/Main.cc b/bin/data_frame_analyzer/Main.cc index 5fd8c8c7b7..b0a8cb69fe 100644 --- a/bin/data_frame_analyzer/Main.cc +++ b/bin/data_frame_analyzer/Main.cc @@ -142,12 +142,11 @@ int main(int argc, char** argv) { std::make_unique(analysisSpecificationJson); if (memoryUsageEstimationOnly) { - const auto result = analysisSpecification->estimateMemoryUsage(); auto outStream = [&ioMgr]() { return std::make_unique(ioMgr.outputStream()); }(); - ml::api::CMemoryUsageEstimationResultJsonWriter jsonWriter(*outStream); - jsonWriter.write(result); + ml::api::CMemoryUsageEstimationResultJsonWriter writer(*outStream); + analysisSpecification->estimateMemoryUsage(writer); return EXIT_SUCCESS; } diff --git a/include/api/CDataFrameAnalysisRunner.h b/include/api/CDataFrameAnalysisRunner.h index c836baff0a..8aa84d2a49 100644 --- a/include/api/CDataFrameAnalysisRunner.h +++ b/include/api/CDataFrameAnalysisRunner.h @@ -31,7 +31,7 @@ class CRowRef; } namespace api { class CDataFrameAnalysisSpecification; -struct SMemoryUsageEstimationResult; +class CMemoryUsageEstimationResultJsonWriter; //! \brief Hierarchy for running a specific core::CDataFrame analyses. //! @@ -79,7 +79,7 @@ class API_EXPORT CDataFrameAnalysisRunner { //! Estimates memory usage in two cases: one partition (the whole data frame //! fits in main memory) and maximum tolerable number of partitions (only //! one partition needs to be loaded to main memory. - SMemoryUsageEstimationResult estimateMemoryUsage() const; + void estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& writer) const; //! Check if the data frame for this analysis should use in or out of core //! storage. diff --git a/include/api/CDataFrameAnalysisSpecification.h b/include/api/CDataFrameAnalysisSpecification.h index d113c5de2b..b2e2c85f3c 100644 --- a/include/api/CDataFrameAnalysisSpecification.h +++ b/include/api/CDataFrameAnalysisSpecification.h @@ -154,7 +154,7 @@ class API_EXPORT CDataFrameAnalysisSpecification { //! Estimates memory usage in two cases: one partition (the whole data frame //! fits in main memory) and maximum tolerable number of partitions (only //! one partition needs to be loaded to main memory. - SMemoryUsageEstimationResult estimateMemoryUsage() const; + void estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& resultWriter) const; private: void initializeRunner(const rapidjson::Value& jsonAnalysis); diff --git a/include/api/CMemoryUsageEstimationResultJsonWriter.h b/include/api/CMemoryUsageEstimationResultJsonWriter.h index a7bc7356d6..82cd2b3160 100644 --- a/include/api/CMemoryUsageEstimationResultJsonWriter.h +++ b/include/api/CMemoryUsageEstimationResultJsonWriter.h @@ -17,15 +17,6 @@ namespace ml { namespace api { -//! Structure to store the memory usage estimation result -struct API_EXPORT SMemoryUsageEstimationResult { - SMemoryUsageEstimationResult(size_t memoryUsageWithOnePartition, size_t memoryUsageWithMaxPartitions); - SMemoryUsageEstimationResult(const SMemoryUsageEstimationResult& result); - - const size_t s_MemoryUsageWithOnePartition; - const size_t s_MemoryUsageWithMaxPartitions; -}; - //! \brief //! Write memory usage estimation result in JSON format //! @@ -38,7 +29,8 @@ class API_EXPORT CMemoryUsageEstimationResultJsonWriter : private core::CNonCopy CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut); //! Writes the given memory usage estimation result in JSON format. - void write(const SMemoryUsageEstimationResult& result); + void write(size_t expectedMemoryUsageWithOnePartition, + size_t expectedMemoryUsageWithMaxPartitions); private: //! JSON line writer diff --git a/lib/api/CDataFrameAnalysisRunner.cc b/lib/api/CDataFrameAnalysisRunner.cc index 89f74152d5..c1c13de8f7 100644 --- a/lib/api/CDataFrameAnalysisRunner.cc +++ b/lib/api/CDataFrameAnalysisRunner.cc @@ -25,6 +25,13 @@ std::size_t memoryLimitWithSafetyMargin(const CDataFrameAnalysisSpecification& s return static_cast(0.9 * static_cast(spec.memoryLimit()) + 0.5); } +std::size_t maximumNumberPartitions(const CDataFrameAnalysisSpecification& spec) { + // We limit the maximum number of partitions to rows^(1/2) because very + // large numbers of partitions are going to be slow and it is better to tell + // user to allocate more resources for the job in this case. + return static_cast(std::sqrt(static_cast(spec.numberRows())) + 0.5); +} + const std::size_t MAXIMUM_FRACTIONAL_PROGRESS{std::size_t{1} << ((sizeof(std::size_t) - 2) * 8)}; } @@ -37,21 +44,19 @@ CDataFrameAnalysisRunner::~CDataFrameAnalysisRunner() { this->waitToFinish(); } -SMemoryUsageEstimationResult CDataFrameAnalysisRunner::estimateMemoryUsage() const { +void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& writer) const { std::size_t numberRows{m_Spec.numberRows()}; std::size_t numberColumns{m_Spec.numberColumns() + this->numberExtraColumns()}; - std::size_t maximumNumberPartitions{ - static_cast(std::sqrt(static_cast(numberRows)) + 0.5)}; - if (maximumNumberPartitions == 0) { - return SMemoryUsageEstimationResult(0, 0); + std::size_t maxNumberPartitions{maximumNumberPartitions(m_Spec)}; + if (maxNumberPartitions == 0) { + writer.write(0, 0); + return; } - std::size_t memoryUsageWithOnePartition{ + std::size_t expectedMemoryUsageWithOnePartition{ this->estimateMemoryUsage(numberRows, numberRows, numberColumns)}; - std::size_t memoryUsageWithMaxPartitions{ - this->estimateMemoryUsage(numberRows, numberRows / maximumNumberPartitions, numberColumns)}; - return SMemoryUsageEstimationResult( - std::ceil(roundMb(memoryUsageWithOnePartition)), - std::ceil(roundMb(memoryUsageWithMaxPartitions))); + std::size_t expectedMemoryUsageWithMaxPartitions{ + this->estimateMemoryUsage(numberRows, numberRows / maxNumberPartitions, numberColumns)}; + writer.write(expectedMemoryUsageWithOnePartition, expectedMemoryUsageWithMaxPartitions); } void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() { @@ -63,16 +68,12 @@ void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() { LOG_TRACE(<< "memory limit = " << memoryLimit); // Find the smallest number of partitions such that the size per partition - // is less than the memory limit. We limit this to rows^(1/2) because very - // large numbers of partitions are going to be slow and it is better to tell - // user to allocate more resources for the job in this case. - - std::size_t maximumNumberPartitions{ - static_cast(std::sqrt(static_cast(numberRows)) + 0.5)}; + // is less than the memory limit. + std::size_t maxNumberPartitions{maximumNumberPartitions(m_Spec)}; std::size_t memoryUsage{0}; - for (m_NumberPartitions = 1; m_NumberPartitions < maximumNumberPartitions; + for (m_NumberPartitions = 1; m_NumberPartitions < maxNumberPartitions; ++m_NumberPartitions) { std::size_t partitionNumberRows{numberRows / m_NumberPartitions}; memoryUsage = this->estimateMemoryUsage(numberRows, partitionNumberRows, numberColumns); diff --git a/lib/api/CDataFrameAnalysisSpecification.cc b/lib/api/CDataFrameAnalysisSpecification.cc index 528dedd2b8..f1310a8873 100644 --- a/lib/api/CDataFrameAnalysisSpecification.cc +++ b/lib/api/CDataFrameAnalysisSpecification.cc @@ -184,9 +184,9 @@ CDataFrameAnalysisRunner* CDataFrameAnalysisSpecification::run(core::CDataFrame& return nullptr; } -SMemoryUsageEstimationResult CDataFrameAnalysisSpecification::estimateMemoryUsage() const { +void CDataFrameAnalysisSpecification::estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& resultWriter) const { // TODO: What to do if m_Runner is nullptr? - return m_Runner->estimateMemoryUsage(); + m_Runner->estimateMemoryUsage(resultWriter); } void CDataFrameAnalysisSpecification::initializeRunner(const rapidjson::Value& jsonAnalysis) { diff --git a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc index 209a7ed0cb..43e610092f 100644 --- a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc +++ b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc @@ -11,29 +11,23 @@ namespace api { namespace { // JSON field names -const std::string MEMORY_USAGE_WITH_ONE_PARTITION("memory_usage_with_one_partition"); -const std::string MEMORY_USAGE_WITH_MAX_PARTITIONS("memory_usage_with_max_partitions"); +const std::string EXPECTED_MEMORY_USAGE_WITH_ONE_PARTITION("expected_memory_usage_with_one_partition"); +const std::string EXPECTED_MEMORY_USAGE_WITH_MAX_PARTITIONS("expected_memory_usage_with_max_partitions"); } -SMemoryUsageEstimationResult::SMemoryUsageEstimationResult( - std::size_t memoryUsageWithOnePartition, std::size_t memoryUsageWithMaxPartitions): -s_MemoryUsageWithOnePartition(memoryUsageWithOnePartition), s_MemoryUsageWithMaxPartitions(memoryUsageWithMaxPartitions) {} - -SMemoryUsageEstimationResult::SMemoryUsageEstimationResult( - const SMemoryUsageEstimationResult& result): -SMemoryUsageEstimationResult(result.s_MemoryUsageWithOnePartition, result.s_MemoryUsageWithMaxPartitions) {} - -CMemoryUsageEstimationResultJsonWriter::CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut) : m_Writer(strmOut) { +CMemoryUsageEstimationResultJsonWriter::CMemoryUsageEstimationResultJsonWriter( + core::CJsonOutputStreamWrapper& strmOut) : m_Writer(strmOut) { // Don't write any output in the constructor because, the way things work at // the moment, the output stream might be redirected after construction } -void CMemoryUsageEstimationResultJsonWriter::write(const SMemoryUsageEstimationResult& result) { +void CMemoryUsageEstimationResultJsonWriter::write(std::size_t expectedMemoryUsageWithOnePartition, + std::size_t expectedMemoryUsageWithMaxPartitions) { m_Writer.StartObject(); - m_Writer.Key(MEMORY_USAGE_WITH_ONE_PARTITION); - m_Writer.Uint64(result.s_MemoryUsageWithOnePartition); - m_Writer.Key(MEMORY_USAGE_WITH_MAX_PARTITIONS); - m_Writer.Uint64(result.s_MemoryUsageWithMaxPartitions); + m_Writer.Key(EXPECTED_MEMORY_USAGE_WITH_ONE_PARTITION); + m_Writer.Uint64(expectedMemoryUsageWithOnePartition); + m_Writer.Key(EXPECTED_MEMORY_USAGE_WITH_MAX_PARTITIONS); + m_Writer.Uint64(expectedMemoryUsageWithMaxPartitions); m_Writer.EndObject(); m_Writer.flush(); } diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc index 1aa8674e04..2d7eb6b02e 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc @@ -122,8 +122,12 @@ void CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageF } } -void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage() { +void testEstimateMemoryUsage(int64_t numberRows, + int64_t expected_expected_memory_usage_with_one_partition, + int64_t expected_expected_memory_usage_with_max_partitions, + int expected_number_errors) { + std::ostringstream sstream; std::vector errors; std::mutex errorsMutex; auto errorHandler = [&errors, &errorsMutex](std::string error) { @@ -132,55 +136,56 @@ void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage() { }; core::CLogger::CScopeSetFatalErrorHandler scope{errorHandler}; - api::CDataFrameOutliersRunnerFactory factory; - // Test estimation for empty data frame + // The output writer won't close the JSON structures until is is destroyed { - errors.clear(); std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( - 0, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; + numberRows, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; api::CDataFrameAnalysisSpecification spec{jsonSpec}; - // Check memory estimation result - api::SMemoryUsageEstimationResult result = spec.estimateMemoryUsage(); - CPPUNIT_ASSERT_EQUAL(0, static_cast(result.s_MemoryUsageWithOnePartition)); - CPPUNIT_ASSERT_EQUAL(0, static_cast(result.s_MemoryUsageWithMaxPartitions)); + core::CJsonOutputStreamWrapper wrappedOutStream(sstream); + api::CMemoryUsageEstimationResultJsonWriter writer(wrappedOutStream); - // no error should be registered - CPPUNIT_ASSERT_EQUAL(1, static_cast(errors.size())); + spec.estimateMemoryUsage(writer); } - // Test estimation for data frame with 1 row - { - errors.clear(); - std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( - 1, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; - api::CDataFrameAnalysisSpecification spec{jsonSpec}; + rapidjson::Document arrayDoc; + arrayDoc.Parse(sstream.str().c_str()); - // Check memory estimation result - api::SMemoryUsageEstimationResult result = spec.estimateMemoryUsage(); - CPPUNIT_ASSERT_EQUAL(6050, static_cast(result.s_MemoryUsageWithOnePartition)); - CPPUNIT_ASSERT_EQUAL(6050, static_cast(result.s_MemoryUsageWithMaxPartitions)); + CPPUNIT_ASSERT(arrayDoc.IsArray()); + CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(1), arrayDoc.Size()); - // no error should be registered - CPPUNIT_ASSERT_EQUAL(0, static_cast(errors.size())); - } + const rapidjson::Value& result = arrayDoc[rapidjson::SizeType(0)]; + CPPUNIT_ASSERT(result.IsObject()); - // Test estimation for data frame with 4 rows - { - errors.clear(); - std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( - 4, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; - api::CDataFrameAnalysisSpecification spec{jsonSpec}; + CPPUNIT_ASSERT(result.HasMember("expected_memory_usage_with_one_partition")); + CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_one_partition, + result["expected_memory_usage_with_one_partition"].GetInt64()); + CPPUNIT_ASSERT(result.HasMember("expected_memory_usage_with_max_partitions")); + CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_max_partitions, + result["expected_memory_usage_with_max_partitions"].GetInt64()); - // Check memory estimation result - api::SMemoryUsageEstimationResult result = spec.estimateMemoryUsage(); - CPPUNIT_ASSERT_EQUAL(9104, static_cast(result.s_MemoryUsageWithOnePartition)); - CPPUNIT_ASSERT_EQUAL(8528, static_cast(result.s_MemoryUsageWithMaxPartitions)); + CPPUNIT_ASSERT_EQUAL(expected_number_errors, static_cast(errors.size())); +} - // no error should be registered - CPPUNIT_ASSERT_EQUAL(0, static_cast(errors.size())); - } +void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_0() { + testEstimateMemoryUsage(0, 0, 0, 1); +} + +void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1() { + testEstimateMemoryUsage(1, 6050, 6050, 0); +} + +void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_10() { + testEstimateMemoryUsage(10, 15128, 13112, 0); +} + +void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_100() { + testEstimateMemoryUsage(100, 63056, 35516, 0); +} + +void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1000() { + testEstimateMemoryUsage(1000, 460328, 146372, 0); } CppUnit::Test* CDataFrameAnalysisRunnerTest::suite() { @@ -193,8 +198,20 @@ CppUnit::Test* CDataFrameAnalysisRunnerTest::suite() { "CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageFlag", &CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageFlag)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage", - &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage)); + "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_0", + &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_0)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1", + &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_10", + &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_10)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_100", + &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_100)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1000", + &CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1000)); return suiteOfTests; } diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.h b/lib/api/unittest/CDataFrameAnalysisRunnerTest.h index 20aa2c5f09..2b3b73d9f0 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.h +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.h @@ -13,7 +13,11 @@ class CDataFrameAnalysisRunnerTest : public CppUnit::TestFixture { public: void testComputeExecutionStrategyForOutliers(); void testComputeAndSaveExecutionStrategyDiskUsageFlag(); - void testEstimateMemoryUsage(); + void testEstimateMemoryUsage_0(); + void testEstimateMemoryUsage_1(); + void testEstimateMemoryUsage_10(); + void testEstimateMemoryUsage_100(); + void testEstimateMemoryUsage_1000(); static CppUnit::Test* suite(); diff --git a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc index 4194f67e4d..52ac327cef 100644 --- a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc +++ b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc @@ -22,7 +22,8 @@ using namespace api; CppUnit::Test* CMemoryUsageEstimationResultJsonWriterTest::suite() { CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageEstimationResultJsonWriterTest"); suiteOfTests->addTest(new CppUnit::TestCaller( - "CMemoryUsageEstimationResultJsonWriterTest::testWrite", &CMemoryUsageEstimationResultJsonWriterTest::testWrite)); + "CMemoryUsageEstimationResultJsonWriterTest::testWrite", + &CMemoryUsageEstimationResultJsonWriterTest::testWrite)); return suiteOfTests; } @@ -33,8 +34,7 @@ void CMemoryUsageEstimationResultJsonWriterTest::testWrite() { { core::CJsonOutputStreamWrapper wrappedOutStream(sstream); CMemoryUsageEstimationResultJsonWriter writer(wrappedOutStream); - SMemoryUsageEstimationResult result(2000, 1000); - writer.write(result); + writer.write(static_cast(2000), static_cast(1000)); } rapidjson::Document arrayDoc; @@ -46,8 +46,8 @@ void CMemoryUsageEstimationResultJsonWriterTest::testWrite() { const rapidjson::Value& object = arrayDoc[rapidjson::SizeType(0)]; CPPUNIT_ASSERT(object.IsObject()); - CPPUNIT_ASSERT(object.HasMember("memory_usage_with_one_partition")); - CPPUNIT_ASSERT_EQUAL(int64_t(2000), object["memory_usage_with_one_partition"].GetInt64()); - CPPUNIT_ASSERT(object.HasMember("memory_usage_with_max_partitions")); - CPPUNIT_ASSERT_EQUAL(int64_t(1000), object["memory_usage_with_max_partitions"].GetInt64()); + CPPUNIT_ASSERT(object.HasMember("expected_memory_usage_with_one_partition")); + CPPUNIT_ASSERT_EQUAL(int64_t(2000), object["expected_memory_usage_with_one_partition"].GetInt64()); + CPPUNIT_ASSERT(object.HasMember("expected_memory_usage_with_max_partitions")); + CPPUNIT_ASSERT_EQUAL(int64_t(1000), object["expected_memory_usage_with_max_partitions"].GetInt64()); } From 8ab959d9f43b34e3d2cfef364e36dfe605fa2838 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Wed, 7 Aug 2019 12:50:05 +0200 Subject: [PATCH 3/6] Apply review comments --- include/api/CDataFrameAnalysisRunner.h | 2 +- include/api/CDataFrameAnalysisSpecification.h | 4 ++-- include/api/CMemoryUsageEstimationResultJsonWriter.h | 2 +- lib/api/CDataFrameAnalysisSpecification.cc | 9 ++++++--- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/api/CDataFrameAnalysisRunner.h b/include/api/CDataFrameAnalysisRunner.h index 8aa84d2a49..79c8fb8b39 100644 --- a/include/api/CDataFrameAnalysisRunner.h +++ b/include/api/CDataFrameAnalysisRunner.h @@ -78,7 +78,7 @@ class API_EXPORT CDataFrameAnalysisRunner { //! Estimates memory usage in two cases: one partition (the whole data frame //! fits in main memory) and maximum tolerable number of partitions (only - //! one partition needs to be loaded to main memory. + //! one partition needs to be loaded to main memory). void estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& writer) const; //! Check if the data frame for this analysis should use in or out of core diff --git a/include/api/CDataFrameAnalysisSpecification.h b/include/api/CDataFrameAnalysisSpecification.h index b2e2c85f3c..e4dd33de1e 100644 --- a/include/api/CDataFrameAnalysisSpecification.h +++ b/include/api/CDataFrameAnalysisSpecification.h @@ -153,8 +153,8 @@ class API_EXPORT CDataFrameAnalysisSpecification { //! Estimates memory usage in two cases: one partition (the whole data frame //! fits in main memory) and maximum tolerable number of partitions (only - //! one partition needs to be loaded to main memory. - void estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& resultWriter) const; + //! one partition needs to be loaded to main memory). + void estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& writer) const; private: void initializeRunner(const rapidjson::Value& jsonAnalysis); diff --git a/include/api/CMemoryUsageEstimationResultJsonWriter.h b/include/api/CMemoryUsageEstimationResultJsonWriter.h index 82cd2b3160..51d24bde55 100644 --- a/include/api/CMemoryUsageEstimationResultJsonWriter.h +++ b/include/api/CMemoryUsageEstimationResultJsonWriter.h @@ -25,7 +25,7 @@ namespace api { //! class API_EXPORT CMemoryUsageEstimationResultJsonWriter : private core::CNonCopyable { public: - //! Constructor that causes output to be written to the specified wrapped stream + //! \param[in] strmOut The wrapped stream to which to write output. CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut); //! Writes the given memory usage estimation result in JSON format. diff --git a/lib/api/CDataFrameAnalysisSpecification.cc b/lib/api/CDataFrameAnalysisSpecification.cc index f1310a8873..629e5d2cbe 100644 --- a/lib/api/CDataFrameAnalysisSpecification.cc +++ b/lib/api/CDataFrameAnalysisSpecification.cc @@ -184,9 +184,12 @@ CDataFrameAnalysisRunner* CDataFrameAnalysisSpecification::run(core::CDataFrame& return nullptr; } -void CDataFrameAnalysisSpecification::estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& resultWriter) const { - // TODO: What to do if m_Runner is nullptr? - m_Runner->estimateMemoryUsage(resultWriter); +void CDataFrameAnalysisSpecification::estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& writer) const { + if (m_Runner == nullptr) { + HANDLE_FATAL(<< "Internal error: no runner available so can't estimate memory. Please report this problem."); + return; + } + m_Runner->estimateMemoryUsage(writer); } void CDataFrameAnalysisSpecification::initializeRunner(const rapidjson::Value& jsonAnalysis) { From 9c2d0d4d5348d800b721c3395f399904ce82aecb Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Wed, 7 Aug 2019 13:09:59 +0200 Subject: [PATCH 4/6] Run clang-format --- bin/data_frame_analyzer/Main.cc | 5 +++-- include/api/CMemoryUsageEstimationResultJsonWriter.h | 2 +- lib/api/CDataFrameAnalysisRunner.cc | 7 +++---- lib/api/CMemoryUsageEstimationResultJsonWriter.cc | 4 ++-- lib/api/unittest/CDataFrameAnalysisRunnerTest.cc | 7 ++++--- .../CMemoryUsageEstimationResultJsonWriterTest.cc | 9 ++++++--- 6 files changed, 19 insertions(+), 15 deletions(-) diff --git a/bin/data_frame_analyzer/Main.cc b/bin/data_frame_analyzer/Main.cc index b0a8cb69fe..94fa39fcb0 100644 --- a/bin/data_frame_analyzer/Main.cc +++ b/bin/data_frame_analyzer/Main.cc @@ -96,8 +96,9 @@ int main(int argc, char** argv) { std::string outputFileName; bool isOutputFileNamedPipe(false); if (ml::data_frame_analyzer::CCmdLineParser::parse( - argc, argv, configFile, memoryUsageEstimationOnly, logProperties, logPipe, lengthEncodedInput, inputFileName, - isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe) == false) { + argc, argv, configFile, memoryUsageEstimationOnly, logProperties, + logPipe, lengthEncodedInput, inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe) == false) { return EXIT_FAILURE; } diff --git a/include/api/CMemoryUsageEstimationResultJsonWriter.h b/include/api/CMemoryUsageEstimationResultJsonWriter.h index 51d24bde55..5c07c903f9 100644 --- a/include/api/CMemoryUsageEstimationResultJsonWriter.h +++ b/include/api/CMemoryUsageEstimationResultJsonWriter.h @@ -30,7 +30,7 @@ class API_EXPORT CMemoryUsageEstimationResultJsonWriter : private core::CNonCopy //! Writes the given memory usage estimation result in JSON format. void write(size_t expectedMemoryUsageWithOnePartition, - size_t expectedMemoryUsageWithMaxPartitions); + size_t expectedMemoryUsageWithMaxPartitions); private: //! JSON line writer diff --git a/lib/api/CDataFrameAnalysisRunner.cc b/lib/api/CDataFrameAnalysisRunner.cc index c1c13de8f7..662dba2fb0 100644 --- a/lib/api/CDataFrameAnalysisRunner.cc +++ b/lib/api/CDataFrameAnalysisRunner.cc @@ -54,8 +54,8 @@ void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJ } std::size_t expectedMemoryUsageWithOnePartition{ this->estimateMemoryUsage(numberRows, numberRows, numberColumns)}; - std::size_t expectedMemoryUsageWithMaxPartitions{ - this->estimateMemoryUsage(numberRows, numberRows / maxNumberPartitions, numberColumns)}; + std::size_t expectedMemoryUsageWithMaxPartitions{this->estimateMemoryUsage( + numberRows, numberRows / maxNumberPartitions, numberColumns)}; writer.write(expectedMemoryUsageWithOnePartition, expectedMemoryUsageWithMaxPartitions); } @@ -73,8 +73,7 @@ void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() { std::size_t maxNumberPartitions{maximumNumberPartitions(m_Spec)}; std::size_t memoryUsage{0}; - for (m_NumberPartitions = 1; m_NumberPartitions < maxNumberPartitions; - ++m_NumberPartitions) { + for (m_NumberPartitions = 1; m_NumberPartitions < maxNumberPartitions; ++m_NumberPartitions) { std::size_t partitionNumberRows{numberRows / m_NumberPartitions}; memoryUsage = this->estimateMemoryUsage(numberRows, partitionNumberRows, numberColumns); LOG_TRACE(<< "partition number rows = " << partitionNumberRows); diff --git a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc index 43e610092f..46869fcfc3 100644 --- a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc +++ b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc @@ -15,8 +15,8 @@ const std::string EXPECTED_MEMORY_USAGE_WITH_ONE_PARTITION("expected_memory_usag const std::string EXPECTED_MEMORY_USAGE_WITH_MAX_PARTITIONS("expected_memory_usage_with_max_partitions"); } -CMemoryUsageEstimationResultJsonWriter::CMemoryUsageEstimationResultJsonWriter( - core::CJsonOutputStreamWrapper& strmOut) : m_Writer(strmOut) { +CMemoryUsageEstimationResultJsonWriter::CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut) + : m_Writer(strmOut) { // Don't write any output in the constructor because, the way things work at // the moment, the output stream might be redirected after construction } diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc index 2d7eb6b02e..389af06386 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc @@ -123,7 +123,7 @@ void CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageF } void testEstimateMemoryUsage(int64_t numberRows, - int64_t expected_expected_memory_usage_with_one_partition, + int64_t expected_expected_memory_usage_with_one_partition, int64_t expected_expected_memory_usage_with_max_partitions, int expected_number_errors) { @@ -140,7 +140,8 @@ void testEstimateMemoryUsage(int64_t numberRows, // The output writer won't close the JSON structures until is is destroyed { std::string jsonSpec{api::CDataFrameAnalysisSpecificationJsonWriter::jsonString( - numberRows, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), "", "outlier_detection", "")}; + numberRows, 5, 100000000, 1, {}, true, test::CTestTmpDir::tmpDir(), + "", "outlier_detection", "")}; api::CDataFrameAnalysisSpecification spec{jsonSpec}; core::CJsonOutputStreamWrapper wrappedOutStream(sstream); @@ -162,7 +163,7 @@ void testEstimateMemoryUsage(int64_t numberRows, CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_one_partition, result["expected_memory_usage_with_one_partition"].GetInt64()); CPPUNIT_ASSERT(result.HasMember("expected_memory_usage_with_max_partitions")); - CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_max_partitions, + CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_max_partitions, result["expected_memory_usage_with_max_partitions"].GetInt64()); CPPUNIT_ASSERT_EQUAL(expected_number_errors, static_cast(errors.size())); diff --git a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc index 52ac327cef..29366d8ff4 100644 --- a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc +++ b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc @@ -20,7 +20,8 @@ using namespace ml; using namespace api; CppUnit::Test* CMemoryUsageEstimationResultJsonWriterTest::suite() { - CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CMemoryUsageEstimationResultJsonWriterTest"); + CppUnit::TestSuite* suiteOfTests = + new CppUnit::TestSuite("CMemoryUsageEstimationResultJsonWriterTest"); suiteOfTests->addTest(new CppUnit::TestCaller( "CMemoryUsageEstimationResultJsonWriterTest::testWrite", &CMemoryUsageEstimationResultJsonWriterTest::testWrite)); @@ -47,7 +48,9 @@ void CMemoryUsageEstimationResultJsonWriterTest::testWrite() { CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object.HasMember("expected_memory_usage_with_one_partition")); - CPPUNIT_ASSERT_EQUAL(int64_t(2000), object["expected_memory_usage_with_one_partition"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(int64_t(2000), + object["expected_memory_usage_with_one_partition"].GetInt64()); CPPUNIT_ASSERT(object.HasMember("expected_memory_usage_with_max_partitions")); - CPPUNIT_ASSERT_EQUAL(int64_t(1000), object["expected_memory_usage_with_max_partitions"].GetInt64()); + CPPUNIT_ASSERT_EQUAL(int64_t(1000), + object["expected_memory_usage_with_max_partitions"].GetInt64()); } From bb26660421abda4f7c7de77c676463ff7b878ee3 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Wed, 7 Aug 2019 13:33:45 +0200 Subject: [PATCH 5/6] Round up results to the nearest kilobyte --- lib/api/CDataFrameAnalysisRunner.cc | 6 +++++- lib/api/unittest/CDataFrameAnalysisRunnerTest.cc | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/api/CDataFrameAnalysisRunner.cc b/lib/api/CDataFrameAnalysisRunner.cc index 662dba2fb0..a56fe91d2a 100644 --- a/lib/api/CDataFrameAnalysisRunner.cc +++ b/lib/api/CDataFrameAnalysisRunner.cc @@ -56,7 +56,11 @@ void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJ this->estimateMemoryUsage(numberRows, numberRows, numberColumns)}; std::size_t expectedMemoryUsageWithMaxPartitions{this->estimateMemoryUsage( numberRows, numberRows / maxNumberPartitions, numberColumns)}; - writer.write(expectedMemoryUsageWithOnePartition, expectedMemoryUsageWithMaxPartitions); + auto roundUpToNearestKilobyte = [](std::size_t bytes) { + return 1024 * ((bytes + 1024 - 1) / 1024); + }; + writer.write(roundUpToNearestKilobyte(expectedMemoryUsageWithOnePartition), + roundUpToNearestKilobyte(expectedMemoryUsageWithMaxPartitions)); } void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() { diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc index 389af06386..cbe52bca47 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc @@ -174,19 +174,19 @@ void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_0() { } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1() { - testEstimateMemoryUsage(1, 6050, 6050, 0); + testEstimateMemoryUsage(1, 6144, 6144, 0); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_10() { - testEstimateMemoryUsage(10, 15128, 13112, 0); + testEstimateMemoryUsage(10, 15360, 13312, 0); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_100() { - testEstimateMemoryUsage(100, 63056, 35516, 0); + testEstimateMemoryUsage(100, 63488, 35840, 0); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1000() { - testEstimateMemoryUsage(1000, 460328, 146372, 0); + testEstimateMemoryUsage(1000, 460800, 146432, 0); } CppUnit::Test* CDataFrameAnalysisRunnerTest::suite() { From 6a8d9009da273b77adabe6a52f60da59bba5ad01 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Wed, 7 Aug 2019 14:58:10 +0200 Subject: [PATCH 6/6] Emit string values (representing kilobytes, with "kB" suffix) rather than long values (representing bytes). --- .../CMemoryUsageEstimationResultJsonWriter.h | 4 ++-- lib/api/CDataFrameAnalysisRunner.cc | 4 ++-- .../CMemoryUsageEstimationResultJsonWriter.cc | 8 +++---- .../unittest/CDataFrameAnalysisRunnerTest.cc | 24 ++++++++++--------- ...moryUsageEstimationResultJsonWriterTest.cc | 12 ++++++---- 5 files changed, 28 insertions(+), 24 deletions(-) diff --git a/include/api/CMemoryUsageEstimationResultJsonWriter.h b/include/api/CMemoryUsageEstimationResultJsonWriter.h index 5c07c903f9..86ed172451 100644 --- a/include/api/CMemoryUsageEstimationResultJsonWriter.h +++ b/include/api/CMemoryUsageEstimationResultJsonWriter.h @@ -29,8 +29,8 @@ class API_EXPORT CMemoryUsageEstimationResultJsonWriter : private core::CNonCopy CMemoryUsageEstimationResultJsonWriter(core::CJsonOutputStreamWrapper& strmOut); //! Writes the given memory usage estimation result in JSON format. - void write(size_t expectedMemoryUsageWithOnePartition, - size_t expectedMemoryUsageWithMaxPartitions); + void write(const std::string& expectedMemoryUsageWithOnePartition, + const std::string& expectedMemoryUsageWithMaxPartitions); private: //! JSON line writer diff --git a/lib/api/CDataFrameAnalysisRunner.cc b/lib/api/CDataFrameAnalysisRunner.cc index a56fe91d2a..b7934fa7e0 100644 --- a/lib/api/CDataFrameAnalysisRunner.cc +++ b/lib/api/CDataFrameAnalysisRunner.cc @@ -49,7 +49,7 @@ void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJ std::size_t numberColumns{m_Spec.numberColumns() + this->numberExtraColumns()}; std::size_t maxNumberPartitions{maximumNumberPartitions(m_Spec)}; if (maxNumberPartitions == 0) { - writer.write(0, 0); + writer.write("0", "0"); return; } std::size_t expectedMemoryUsageWithOnePartition{ @@ -57,7 +57,7 @@ void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJ std::size_t expectedMemoryUsageWithMaxPartitions{this->estimateMemoryUsage( numberRows, numberRows / maxNumberPartitions, numberColumns)}; auto roundUpToNearestKilobyte = [](std::size_t bytes) { - return 1024 * ((bytes + 1024 - 1) / 1024); + return std::to_string((bytes + 1024 - 1) / 1024) + "kB"; }; writer.write(roundUpToNearestKilobyte(expectedMemoryUsageWithOnePartition), roundUpToNearestKilobyte(expectedMemoryUsageWithMaxPartitions)); diff --git a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc index 46869fcfc3..8b4a3fda89 100644 --- a/lib/api/CMemoryUsageEstimationResultJsonWriter.cc +++ b/lib/api/CMemoryUsageEstimationResultJsonWriter.cc @@ -21,13 +21,13 @@ CMemoryUsageEstimationResultJsonWriter::CMemoryUsageEstimationResultJsonWriter(c // the moment, the output stream might be redirected after construction } -void CMemoryUsageEstimationResultJsonWriter::write(std::size_t expectedMemoryUsageWithOnePartition, - std::size_t expectedMemoryUsageWithMaxPartitions) { +void CMemoryUsageEstimationResultJsonWriter::write(const std::string& expectedMemoryUsageWithOnePartition, + const std::string& expectedMemoryUsageWithMaxPartitions) { m_Writer.StartObject(); m_Writer.Key(EXPECTED_MEMORY_USAGE_WITH_ONE_PARTITION); - m_Writer.Uint64(expectedMemoryUsageWithOnePartition); + m_Writer.String(expectedMemoryUsageWithOnePartition); m_Writer.Key(EXPECTED_MEMORY_USAGE_WITH_MAX_PARTITIONS); - m_Writer.Uint64(expectedMemoryUsageWithMaxPartitions); + m_Writer.String(expectedMemoryUsageWithMaxPartitions); m_Writer.EndObject(); m_Writer.flush(); } diff --git a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc index cbe52bca47..8fd6f16e92 100644 --- a/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc +++ b/lib/api/unittest/CDataFrameAnalysisRunnerTest.cc @@ -123,8 +123,8 @@ void CDataFrameAnalysisRunnerTest::testComputeAndSaveExecutionStrategyDiskUsageF } void testEstimateMemoryUsage(int64_t numberRows, - int64_t expected_expected_memory_usage_with_one_partition, - int64_t expected_expected_memory_usage_with_max_partitions, + const std::string& expected_expected_memory_usage_with_one_partition, + const std::string& expected_expected_memory_usage_with_max_partitions, int expected_number_errors) { std::ostringstream sstream; @@ -160,33 +160,35 @@ void testEstimateMemoryUsage(int64_t numberRows, CPPUNIT_ASSERT(result.IsObject()); CPPUNIT_ASSERT(result.HasMember("expected_memory_usage_with_one_partition")); - CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_one_partition, - result["expected_memory_usage_with_one_partition"].GetInt64()); + CPPUNIT_ASSERT_EQUAL( + expected_expected_memory_usage_with_one_partition, + std::string(result["expected_memory_usage_with_one_partition"].GetString())); CPPUNIT_ASSERT(result.HasMember("expected_memory_usage_with_max_partitions")); - CPPUNIT_ASSERT_EQUAL(expected_expected_memory_usage_with_max_partitions, - result["expected_memory_usage_with_max_partitions"].GetInt64()); + CPPUNIT_ASSERT_EQUAL( + expected_expected_memory_usage_with_max_partitions, + std::string(result["expected_memory_usage_with_max_partitions"].GetString())); CPPUNIT_ASSERT_EQUAL(expected_number_errors, static_cast(errors.size())); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_0() { - testEstimateMemoryUsage(0, 0, 0, 1); + testEstimateMemoryUsage(0, "0", "0", 1); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1() { - testEstimateMemoryUsage(1, 6144, 6144, 0); + testEstimateMemoryUsage(1, "6kB", "6kB", 0); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_10() { - testEstimateMemoryUsage(10, 15360, 13312, 0); + testEstimateMemoryUsage(10, "15kB", "13kB", 0); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_100() { - testEstimateMemoryUsage(100, 63488, 35840, 0); + testEstimateMemoryUsage(100, "62kB", "35kB", 0); } void CDataFrameAnalysisRunnerTest::testEstimateMemoryUsage_1000() { - testEstimateMemoryUsage(1000, 460800, 146432, 0); + testEstimateMemoryUsage(1000, "450kB", "143kB", 0); } CppUnit::Test* CDataFrameAnalysisRunnerTest::suite() { diff --git a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc index 29366d8ff4..412429ef10 100644 --- a/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc +++ b/lib/api/unittest/CMemoryUsageEstimationResultJsonWriterTest.cc @@ -35,7 +35,7 @@ void CMemoryUsageEstimationResultJsonWriterTest::testWrite() { { core::CJsonOutputStreamWrapper wrappedOutStream(sstream); CMemoryUsageEstimationResultJsonWriter writer(wrappedOutStream); - writer.write(static_cast(2000), static_cast(1000)); + writer.write("16kB", "8kB"); } rapidjson::Document arrayDoc; @@ -48,9 +48,11 @@ void CMemoryUsageEstimationResultJsonWriterTest::testWrite() { CPPUNIT_ASSERT(object.IsObject()); CPPUNIT_ASSERT(object.HasMember("expected_memory_usage_with_one_partition")); - CPPUNIT_ASSERT_EQUAL(int64_t(2000), - object["expected_memory_usage_with_one_partition"].GetInt64()); + CPPUNIT_ASSERT_EQUAL( + std::string("16kB"), + std::string(object["expected_memory_usage_with_one_partition"].GetString())); CPPUNIT_ASSERT(object.HasMember("expected_memory_usage_with_max_partitions")); - CPPUNIT_ASSERT_EQUAL(int64_t(1000), - object["expected_memory_usage_with_max_partitions"].GetInt64()); + CPPUNIT_ASSERT_EQUAL( + std::string("8kB"), + std::string(object["expected_memory_usage_with_max_partitions"].GetString())); }